1 # {{{ BEGIN BPS TAGGED BLOCK
5 # This software is Copyright (c) 1996-2004 Best Practical Solutions, LLC
6 # <jesse@bestpractical.com>
8 # (Except where explicitly superseded by other copyright notices)
13 # This work is made available to you under the terms of Version 2 of
14 # the GNU General Public License. A copy of that license should have
15 # been provided with this software, but in any event can be snarfed
18 # This work is distributed in the hope that it will be useful, but
19 # WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 # CONTRIBUTION SUBMISSION POLICY:
30 # (The following paragraph is not intended to limit the rights granted
31 # to you to modify and distribute this software under the terms of
32 # the GNU General Public License and is only of importance to you if
33 # you choose to contribute your changes and enhancements to the
34 # community by submitting them to Best Practical Solutions, LLC.)
36 # By intentionally submitting any modifications, corrections or
37 # derivatives to this work, or any other work intended for use with
38 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
39 # you are the copyright holder for those contributions and you grant
40 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
41 # royalty-free, perpetual, license to use, copy, create derivative
42 # works based on those contributions, and sublicense and distribute
43 # those contributions and any derivatives thereof.
45 # }}} END BPS TAGGED BLOCK
48 RT::I18N - a base class for localization of RT
55 use Locale::Maketext 1.04;
56 use Locale::Maketext::Lexicon 0.25;
57 use base ('Locale::Maketext::Fuzzy');
58 use vars qw( %Lexicon );
60 #If we're running on 5.6, we desperately need Encode::compat. But if we're on 5.8, we don't really need it.
61 BEGIN { if ($] < 5.007001) {
62 require Encode::compat;
69 # I decree that this project's first language is English.
72 'TEST_STRING' => 'Concrete Mixer',
74 '__Content-Type' => 'text/plain; charset=utf-8',
77 # That means that lookup failures can't happen -- if we get as far
78 # as looking for something in this lexicon, and we don't find it,
79 # then automagically set $Lexicon{$key} = $key, before possibly
82 # The exception is keys that start with "_" -- they aren't auto-makeable.
89 Initializes the lexicons used for localization.
101 # Load language-specific functions
102 foreach my $language ( glob(substr(__FILE__, 0, -3) . "/*.pm")) {
103 if ($language =~ /^([-\w.\/\\~:]+)$/) {
107 warn("$language is tainted. not loading");
111 my @lang = @RT::LexiconLanguages;
112 @lang = ('*') unless @lang;
114 # Acquire all .po files and iterate them into lexicons
115 Locale::Maketext::Lexicon->import({
118 Gettext => (substr(__FILE__, 0, -3) . "/$_.po"),
119 Gettext => "$RT::LocalLexiconPath/*/$_.po",
129 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
130 If it can't find anything, it returns 'ISO-8859-1'
134 ok(my $chinese = RT::I18N->get_handle('zh_tw'));
135 ok(UNIVERSAL::can($chinese, 'maketext'));
136 ok($chinese->maketext('__Content-Type') =~ /utf-8/i, "Found the utf-8 charset for traditional chinese in the string ".$chinese->maketext('__Content-Type'));
137 ok($chinese->encoding eq 'utf-8', "The encoding is 'utf-8' -".$chinese->encoding);
139 ok(my $en = RT::I18N->get_handle('en'));
140 ok(UNIVERSAL::can($en, 'maketext'));
141 ok($en->encoding eq 'utf-8', "The encoding ".$en->encoding." is 'utf-8'");
149 sub encoding { 'utf-8' }
151 # {{{ SetMIMEEntityToUTF8
153 =head2 SetMIMEEntityToUTF8 $entity
155 An utility method which will try to convert entity body into utf8.
156 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
160 sub SetMIMEEntityToUTF8 {
161 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
166 # {{{ SetMIMEEntityToEncoding
168 =head2 SetMIMEEntityToEncoding $entity, $encoding
170 An utility method which will try to convert entity body into specified
171 charset encoding (encoded as octets, *not* unicode-strings). It will
172 iterate all the entities in $entity, and try to convert each one into
173 specified charset if whose Content-Type is 'text/plain'.
175 This method doesn't return anything meaningful.
179 sub SetMIMEEntityToEncoding {
180 my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
182 #if ( $entity->is_multipart ) {
183 #$RT::Logger->crit("This entity is a multipart " . $entity->head->as_string);
184 SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
187 my $charset = _FindOrGuessCharset($entity) or return;
188 # one and only normalization
189 $charset = 'utf-8' if $charset =~ /^utf-?8$/i;
190 $enc = 'utf-8' if $enc =~ /^utf-?8$/i;
192 SetMIMEHeadToEncoding(
194 _FindOrGuessCharset($entity, 1) => $enc,
198 my $head = $entity->head;
200 # convert at least MIME word encoded attachment filename
201 foreach my $attr (qw(content-type.name content-disposition.filename)) {
202 if ( my $name = $head->mime_attr($attr) and !$preserve_words ) {
203 $head->mime_attr( $attr => DecodeMIMEWordsToUTF8($name) );
207 # If this is a textual entity, we'd need to preserve its original encoding
208 $head->add( "X-RT-Original-Encoding" => $charset )
209 if $head->mime_attr('content-type.charset') or $head->mime_type =~ /^text/;
212 return unless ( $head->mime_type =~ qr{^(text/plain|message/rfc822)$}i );
215 my $body = $entity->bodyhandle;
217 if ( $enc ne $charset && $body) {
218 my @lines = $body->as_lines or return;
220 # {{{ Convert the body
222 $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". $head->get('subject'));
224 # NOTE:: see the comments at the end of the sub.
225 Encode::_utf8_off( $lines[$_] ) foreach ( 0 .. $#lines );
226 Encode::from_to( $lines[$_], $charset => $enc ) for ( 0 .. $#lines );
230 $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
232 Encode::from_to( $lines[$_], 'iso-8859-1' => $enc ) foreach ( 0 .. $#lines );
235 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
240 my $new_body = MIME::Body::InCore->new( \@lines );
242 # set up the new entity
243 $head->mime_attr( "content-type" => 'text/plain' )
244 unless ( $head->mime_attr("content-type") );
245 $head->mime_attr( "content-type.charset" => $enc );
246 $entity->bodyhandle($new_body);
250 # NOTES: Why Encode::_utf8_off before Encode::from_to
252 # All the strings in RT are utf-8 now. Quotes from Encode POD:
254 # [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
255 # ... The data in $octets must be encoded as octets and not as
256 # characters in Perl's internal format. ...
258 # Not turning off the UTF-8 flag in the string will prevent the string
263 # {{{ DecodeMIMEWordsToUTF8
265 =head2 DecodeMIMEWordsToUTF8 $raw
267 An utility method which mimics MIME::Words::decode_mimewords, but only
268 limited functionality. This function returns an utf-8 string.
270 It returns the decoded string, or the original string if it's not
271 encoded. Since the subroutine converts specified string into utf-8
272 charset, it should not alter a subject written in English.
274 Why not use MIME::Words directly? Because it fails in RT when I
275 tried. Maybe it's ok now.
279 sub DecodeMIMEWordsToUTF8 {
281 DecodeMIMEWordsToEncoding($str, 'utf-8');
284 sub DecodeMIMEWordsToEncoding {
289 @_ = $str =~ m/([^=]*)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/g;
291 return ($str) unless (@_);
295 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
296 (shift, shift, shift, shift, shift);
298 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
300 if ($encoding eq 'Q' or $encoding eq 'q') {
301 use MIME::QuotedPrint;
302 $enc_str =~ tr/_/ /; # Observed from Outlook Express
303 $enc_str = decode_qp($enc_str);
304 } elsif ($encoding eq 'B' or $encoding eq 'b') {
306 $enc_str = decode_base64($enc_str);
308 $RT::Logger->warning("RT::I18N::DecodeMIMEWordsToCharset got a " .
309 "strange encoding: $encoding.");
312 # now we have got a decoded subject, try to convert into the encoding
313 unless ($charset eq $enc) {
314 eval { Encode::from_to($enc_str, $charset, $enc) };
316 $charset = _GuessCharset( $enc_str );
317 Encode::from_to($enc_str, $charset, $enc);
321 $str .= $prefix . $enc_str . $trailing;
329 # {{{ _FindOrGuessCharset
331 =head2 _FindOrGuessCharset MIME::Entity, $head_only
333 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
335 If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
339 sub _FindOrGuessCharset {
341 my $head_only = shift;
342 my $head = $entity->head;
344 if ($head->mime_attr("content-type.charset")) {
345 return $head->mime_attr("content-type.charset");
348 if ( !$head_only and $head->mime_type =~ m{^text/}) {
349 my $body = $entity->bodyhandle or return;
350 return _GuessCharset( $body->as_string );
353 # potentially binary data -- don't guess the body
354 return _GuessCharset( $head->as_string );
363 =head2 _GuessCharset STRING
365 use Encode::Guess to try to figure it out the string's encoding.
370 my $fallback = 'iso-8859-1';
373 if ( @RT::EmailInputEncodings and eval { require Encode::Guess; 1 } ) {
374 Encode::Guess->set_suspects(@RT::EmailInputEncodings);
375 my $decoder = Encode::Guess->guess( $_[0] );
377 if ( ref $decoder ) {
378 $charset = $decoder->name;
379 $RT::Logger->debug("Guessed encoding: $charset");
382 elsif ($decoder =~ /(\S+ or .+)/) {
383 my %matched = map { $_ => 1 } split(/ or /, $1);
384 return 'utf-8' if $matched{'utf8'}; # one and only normalization
386 foreach my $suspect (@RT::EmailInputEncodings) {
387 next unless $matched{$suspect};
388 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
394 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
398 $RT::Logger->warning("Cannot Encode::Guess; fallback to $fallback");
401 return($charset || $fallback);
406 # {{{ SetMIMEHeadToEncoding
408 =head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
410 Converts a MIME Head from one encoding to another. This totally violates the RFC.
411 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
417 sub SetMIMEHeadToEncoding {
418 my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
420 $charset = 'utf-8' if $charset eq 'utf8';
421 $enc = 'utf-8' if $enc eq 'utf8';
423 return if $charset eq $enc and $preserve_words;
425 foreach my $tag ( $head->tags ) {
426 my @values = $head->get_all($tag);
428 foreach my $value (@values) {
429 if ( $charset ne $enc ) {
432 Encode::_utf8_off($value);
433 Encode::from_to( $value, $charset => $enc );
436 $RT::Logger->error( "Encoding error: " . $@
437 . " defaulting to ISO-8859-1 -> UTF-8" );
438 eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
440 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
444 $value = DecodeMIMEWordsToEncoding( $value, $enc ) unless $preserve_words;
445 $head->add( $tag, $value );
452 eval "require RT::I18N_Vendor";
453 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Vendor.pm});
454 eval "require RT::I18N_Local";
455 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Local.pm});