X-Git-Url: http://git.freeside.biz/gitweb/?a=blobdiff_plain;f=rt%2Flib%2FRT%2FI18N.pm;h=9e89ecd47700333f541caae20e27690fed0ea171;hb=90edd8a914fd484e649fb0aa051dce7927bd6881;hp=9d0b2b5a135aa1d0dca583233d19a7ee61786829;hpb=8103c1fc1b2c27a6855feadf26f91b980a54bc52;p=freeside.git diff --git a/rt/lib/RT/I18N.pm b/rt/lib/RT/I18N.pm index 9d0b2b5a1..9e89ecd47 100644 --- a/rt/lib/RT/I18N.pm +++ b/rt/lib/RT/I18N.pm @@ -1,40 +1,40 @@ # BEGIN BPS TAGGED BLOCK {{{ -# +# # COPYRIGHT: -# -# This software is Copyright (c) 1996-2007 Best Practical Solutions, LLC -# -# +# +# This software is Copyright (c) 1996-2011 Best Practical Solutions, LLC +# +# # (Except where explicitly superseded by other copyright notices) -# -# +# +# # LICENSE: -# +# # This work is made available to you under the terms of Version 2 of # the GNU General Public License. A copy of that license should have # been provided with this software, but in any event can be snarfed # from www.gnu.org. -# +# # This work is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301 or visit their web page on the internet at -# http://www.gnu.org/copyleft/gpl.html. -# -# +# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html. +# +# # CONTRIBUTION SUBMISSION POLICY: -# +# # (The following paragraph is not intended to limit the rights granted # to you to modify and distribute this software under the terms of # the GNU General Public License and is only of importance to you if # you choose to contribute your changes and enhancements to the # community by submitting them to Best Practical Solutions, LLC.) -# +# # By intentionally submitting any modifications, corrections or # derivatives to this work, or any other work intended for use with # Request Tracker, to Best Practical Solutions, LLC, you confirm that @@ -43,8 +43,9 @@ # royalty-free, perpetual, license to use, copy, create derivative # works based on those contributions, and sublicense and distribute # those contributions and any derivatives thereof. -# +# # END BPS TAGGED BLOCK }}} + =head1 NAME RT::I18N - a base class for localization of RT @@ -86,62 +87,70 @@ our %Lexicon = ( Initializes the lexicons used for localization. -=begin testing - -use_ok (RT::I18N); -ok(RT::I18N->Init); - -=end testing =cut sub Init { require File::Glob; + my @lang = RT->Config->Get('LexiconLanguages'); + @lang = ('*') unless @lang; + + # load default functions + require substr(__FILE__, 0, -3) . '/i_default.pm'; + # Load language-specific functions - foreach my $language ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm")) { - if ($language =~ /^([-\w\s.\/\\~:]+)$/) { - require $1; + foreach my $file ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm") ) { + unless ( $file =~ /^([-\w\s\.\/\\~:]+)$/ ) { + warn("$file is tainted. not loading"); + next; } - else { - warn("$language is tainted. not loading"); - } + $file = $1; + + my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/); + next unless grep $_ eq '*' || $_ eq $lang, @lang; + require $file; } - my @lang = @RT::LexiconLanguages; - @lang = ('*') unless @lang; + my %import; + foreach my $l ( @lang ) { + $import{$l} = [ + Gettext => (substr(__FILE__, 0, -3) . "/$l.po"), + Gettext => "$RT::LocalLexiconPath/*/$l.po", + Gettext => "$RT::LocalLexiconPath/$l.po", + ]; + push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po'); + } # Acquire all .po files and iterate them into lexicons - Locale::Maketext::Lexicon->import({ - _decode => 1, map { - $_ => [ - Gettext => (substr(__FILE__, 0, -3) . "/$_.po"), - Gettext => "$RT::LocalLexiconPath/*/$_.po", - Gettext => "$RT::LocalLexiconPath/$_.po", - ], - } @lang - }); + Locale::Maketext::Lexicon->import({ _decode => 1, %import }); return 1; } +sub LoadLexicons { + + no strict 'refs'; + foreach my $k (keys %{RT::I18N::} ) { + next if $k eq 'main::'; + next unless index($k, '::', -2) >= 0; + next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'}; + + my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH}; + # run fetch to force load + my $tmp = $lex->{'foo'}; + # XXX: untie may fail with "untie attempted + # while 1 inner references still exist" + # TODO: untie that has to lower fetch impact + # untie %$lex if tied %$lex; + } +} + =head2 encoding Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field. If it can't find anything, it returns 'ISO-8859-1' -=begin testing - -ok(my $chinese = RT::I18N->get_handle('zh_tw')); -ok(UNIVERSAL::can($chinese, 'maketext')); -ok($chinese->maketext('__Content-Type') =~ /utf-8/i, "Found the utf-8 charset for traditional chinese in the string ".$chinese->maketext('__Content-Type')); -ok($chinese->encoding eq 'utf-8', "The encoding is 'utf-8' -".$chinese->encoding); - -ok(my $en = RT::I18N->get_handle('en')); -ok(UNIVERSAL::can($en, 'maketext')); -ok($en->encoding eq 'utf-8', "The encoding ".$en->encoding." is 'utf-8'"); - -=end testing =cut @@ -194,6 +203,11 @@ charset encoding (encoded as octets, *not* unicode-strings). It will iterate all the entities in $entity, and try to convert each one into specified charset if whose Content-Type is 'text/plain'. +the methods are tries in order: +1) to convert the entity to $encoding, +2) to interpret the entity as iso-8859-1 and then convert it to $encoding, +3) forcibly convert it to $encoding. + This function doesn't return anything meaningful. =cut @@ -225,37 +239,51 @@ sub SetMIMEEntityToEncoding { } # If this is a textual entity, we'd need to preserve its original encoding - $head->add( "X-RT-Original-Encoding" => $charset ) + $head->replace( "X-RT-Original-Encoding" => $charset ) if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type); return unless IsTextualContentType($head->mime_type); my $body = $entity->bodyhandle; - if ( $enc ne $charset && $body) { - my @lines = $body->as_lines or return; - - # {{{ Convert the body - eval { - $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". ($head->get('subject') || 'Subjectless message')); - - # NOTE:: see the comments at the end of the sub. - Encode::_utf8_off( $lines[$_] ) foreach ( 0 .. $#lines ); - Encode::from_to( $lines[$_], $charset => $enc ) for ( 0 .. $#lines ); - }; - - if ($@) { - $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" ); - eval { - Encode::from_to( $lines[$_], 'iso-8859-1' => $enc ) foreach ( 0 .. $#lines ); - }; - if ($@) { - $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" ); - } - } - # }}} + if ( $enc ne $charset && $body ) { + my $string = $body->as_string or return; + # NOTE:: see the comments at the end of the sub. + Encode::_utf8_off($string); + my $orig_string = $string; + + # {{{ Convert the body + eval { + $RT::Logger->debug( "Converting '$charset' to '$enc' for " + . $head->mime_type . " - " + . ( $head->get('subject') || 'Subjectless message' ) ); + Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK ); + }; + + if ($@) { + $RT::Logger->error( "Encoding error: " + . $@ + . " falling back to iso-8859-1 => $enc" ); + $string = $orig_string; + eval { + Encode::from_to( + $string, + 'iso-8859-1' => $enc, + Encode::FB_CROAK + ); + }; + if ($@) { + $RT::Logger->error( "Encoding error: " + . $@ + . " forcing conversion to $charset => $enc" ); + $string = $orig_string; + Encode::from_to( $string, $charset => $enc ); + } + } - my $new_body = MIME::Body::InCore->new( \@lines ); + # }}} + + my $new_body = MIME::Body::InCore->new($string); # set up the new entity $head->mime_attr( "content-type" => 'text/plain' ) @@ -296,24 +324,26 @@ tried. Maybe it's ok now. sub DecodeMIMEWordsToUTF8 { my $str = shift; - DecodeMIMEWordsToEncoding($str, 'utf-8'); + return DecodeMIMEWordsToEncoding($str, 'utf-8', @_); } sub DecodeMIMEWordsToEncoding { my $str = shift; - my $enc = shift; + my $to_charset = shift; + my $field = shift || ''; - @_ = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs; - return ($str) unless (@_); + my @list = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs; + return ($str) unless (@list); # add everything that hasn't matched to the end of the latest # string in array this happen when we have 'key="=?encoded?="; key="plain"' - $_[-1] .= substr($str, pos $str); + $list[-1] .= substr($str, pos $str); $str = ""; - while (@_) { + while (@list) { my ($prefix, $charset, $encoding, $enc_str, $trailing) = - (shift, shift, lc shift, shift, shift); + splice @list, 0, 5; + $encoding = lc $encoding; $trailing =~ s/\s?\t?$//; # Observed from Outlook Express @@ -329,14 +359,16 @@ sub DecodeMIMEWordsToEncoding { ."only Q(uoted-printable) and B(ase64) are supported"); } - # now we have got a decoded subject, try to convert into the encoding - unless ($charset eq $enc) { - eval { Encode::from_to($enc_str, $charset, $enc) }; - if ($@) { - $charset = _GuessCharset( $enc_str ); - Encode::from_to($enc_str, $charset, $enc); - } - } + # now we have got a decoded subject, try to convert into the encoding + unless ( $charset eq $to_charset ) { + my $orig_str = $enc_str; + eval { Encode::from_to( $enc_str, $charset, $to_charset, Encode::FB_CROAK ) }; + if ($@) { + $enc_str = $orig_str; + $charset = _GuessCharset( $enc_str ); + Encode::from_to( $enc_str, $charset, $to_charset ); + } + } # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers # We _should_ be preserving them encoded until after parsing is completed and @@ -351,7 +383,14 @@ sub DecodeMIMEWordsToEncoding { # until this is fixed, we must escape any string containing a comma or semicolon # this is only a bandaid - $enc_str = qq{"$enc_str"} if ($enc_str =~ /[,;]/); + # Some _other_ MUAs encode quotes _already_, and double quotes + # confuse us a lot, so only quote it if it isn't quoted + # already. + $enc_str = qq{"$enc_str"} + if $enc_str =~ /[,;]/ + and $enc_str !~ /^".*"$/ + and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i); + $str .= $prefix . $enc_str . $trailing; } @@ -405,10 +444,14 @@ use Encode::Guess to try to figure it out the string's encoding. sub _GuessCharset { my $fallback = 'iso-8859-1'; - my $charset; - if ( @RT::EmailInputEncodings and eval { require Encode::Guess; 1 } ) { - Encode::Guess->set_suspects(@RT::EmailInputEncodings); + # if $_[0] is null/empty, we don't guess its encoding + return $fallback unless defined $_[0] && length $_[0]; + + my $charset; + my @encodings = RT->Config->Get('EmailInputEncodings'); + if ( @encodings and eval { require Encode::Guess; 1 } ) { + Encode::Guess->set_suspects( @encodings ); my $decoder = Encode::Guess->guess( $_[0] ); if ( defined($decoder) ) { @@ -421,7 +464,7 @@ sub _GuessCharset { my %matched = map { $_ => 1 } split(/ or /, $1); return 'utf-8' if $matched{'utf8'}; # one and only normalization - foreach my $suspect (@RT::EmailInputEncodings) { + foreach my $suspect (RT->Config->Get('EmailInputEncodings')) { next unless $matched{$suspect}; $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect"); $charset = $suspect; @@ -436,11 +479,13 @@ sub _GuessCharset { $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback"); } } - else { - $RT::Logger->warning("Cannot Encode::Guess; fallback to $fallback"); + elsif ( @encodings && $@ ) { + $RT::Logger->error("You have set EmailInputEncodings, but we couldn't load Encode::Guess: $@"); + } else { + $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback"); } - return($charset || $fallback); + return ($charset || $fallback); } # }}} @@ -469,22 +514,35 @@ sub SetMIMEHeadToEncoding { my @values = $head->get_all($tag); $head->delete($tag); foreach my $value (@values) { + Encode::_utf8_off($value); + my $orig_value = $value; if ( $charset ne $enc ) { - eval { - Encode::_utf8_off($value); - Encode::from_to( $value, $charset => $enc ); + Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK ); }; if ($@) { - $RT::Logger->error( "Encoding error: " . $@ - . " defaulting to ISO-8859-1 -> UTF-8" ); - eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) }; + $RT::Logger->error( "Encoding error: " + . $@ + . " falling back to iso-8859-1 => $enc" ); + $value = $orig_value; + eval { + Encode::from_to( + $value, + 'iso-8859-1' => $enc, + Encode::FB_CROAK + ); + }; if ($@) { - $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" ); + $RT::Logger->error( "Encoding error: " + . $@ + . " forcing conversion to $charset => $enc" ); + $value = $orig_value; + Encode::from_to( $value, $charset => $enc ); } } } - $value = DecodeMIMEWordsToEncoding( $value, $enc ) unless $preserve_words; + $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag ) + unless $preserve_words; $head->add( $tag, $value ); } }