1 # BEGIN BPS TAGGED BLOCK {{{
5 # This software is Copyright (c) 1996-2005 Best Practical Solutions, LLC
6 # <jesse@bestpractical.com>
8 # (Except where explicitly superseded by other copyright notices)
13 # This work is made available to you under the terms of Version 2 of
14 # the GNU General Public License. A copy of that license should have
15 # been provided with this software, but in any event can be snarfed
18 # This work is distributed in the hope that it will be useful, but
19 # WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 # CONTRIBUTION SUBMISSION POLICY:
30 # (The following paragraph is not intended to limit the rights granted
31 # to you to modify and distribute this software under the terms of
32 # the GNU General Public License and is only of importance to you if
33 # you choose to contribute your changes and enhancements to the
34 # community by submitting them to Best Practical Solutions, LLC.)
36 # By intentionally submitting any modifications, corrections or
37 # derivatives to this work, or any other work intended for use with
38 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
39 # you are the copyright holder for those contributions and you grant
40 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
41 # royalty-free, perpetual, license to use, copy, create derivative
42 # works based on those contributions, and sublicense and distribute
43 # those contributions and any derivatives thereof.
45 # END BPS TAGGED BLOCK }}}
49 RT::I18N - a base class for localization of RT
56 use Locale::Maketext 1.04;
57 use Locale::Maketext::Lexicon 0.25;
58 use base ('Locale::Maketext::Fuzzy');
59 use vars qw( %Lexicon );
61 #If we're running on 5.6, we desperately need Encode::compat. But if we're on 5.8, we don't really need it.
62 BEGIN { if ($] < 5.007001) {
63 require Encode::compat;
70 # I decree that this project's first language is English.
73 'TEST_STRING' => 'Concrete Mixer',
75 '__Content-Type' => 'text/plain; charset=utf-8',
78 # That means that lookup failures can't happen -- if we get as far
79 # as looking for something in this lexicon, and we don't find it,
80 # then automagically set $Lexicon{$key} = $key, before possibly
83 # The exception is keys that start with "_" -- they aren't auto-makeable.
90 Initializes the lexicons used for localization.
104 # Load language-specific functions
105 foreach my $language ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm")) {
106 if ($language =~ /^([-\w\s.\/\\~:]+)$/) {
110 warn("$language is tainted. not loading");
114 my @lang = @RT::LexiconLanguages;
115 @lang = ('*') unless @lang;
117 # Acquire all .po files and iterate them into lexicons
118 Locale::Maketext::Lexicon->import({
121 Gettext => (substr(__FILE__, 0, -3) . "/$_.po"),
122 Gettext => "$RT::LocalLexiconPath/*/$_.po",
123 Gettext => "$RT::LocalLexiconPath/$_.po",
133 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
134 If it can't find anything, it returns 'ISO-8859-1'
138 ok(my $chinese = RT::I18N->get_handle('zh_tw'));
139 ok(UNIVERSAL::can($chinese, 'maketext'));
140 ok($chinese->maketext('__Content-Type') =~ /utf-8/i, "Found the utf-8 charset for traditional chinese in the string ".$chinese->maketext('__Content-Type'));
141 ok($chinese->encoding eq 'utf-8', "The encoding is 'utf-8' -".$chinese->encoding);
143 ok(my $en = RT::I18N->get_handle('en'));
144 ok(UNIVERSAL::can($en, 'maketext'));
145 ok($en->encoding eq 'utf-8', "The encoding ".$en->encoding." is 'utf-8'");
153 sub encoding { 'utf-8' }
155 # {{{ SetMIMEEntityToUTF8
157 =head2 SetMIMEEntityToUTF8 $entity
159 An utility method which will try to convert entity body into utf8.
160 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
164 sub SetMIMEEntityToUTF8 {
165 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
170 # {{{ SetMIMEEntityToEncoding
172 =head2 SetMIMEEntityToEncoding $entity, $encoding
174 An utility method which will try to convert entity body into specified
175 charset encoding (encoded as octets, *not* unicode-strings). It will
176 iterate all the entities in $entity, and try to convert each one into
177 specified charset if whose Content-Type is 'text/plain'.
179 This method doesn't return anything meaningful.
183 sub SetMIMEEntityToEncoding {
184 my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
186 #if ( $entity->is_multipart ) {
187 #$RT::Logger->crit("This entity is a multipart " . $entity->head->as_string);
188 SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
191 my $charset = _FindOrGuessCharset($entity) or return;
192 # one and only normalization
193 $charset = 'utf-8' if $charset =~ /^utf-?8$/i;
194 $enc = 'utf-8' if $enc =~ /^utf-?8$/i;
196 SetMIMEHeadToEncoding(
198 _FindOrGuessCharset($entity, 1) => $enc,
202 my $head = $entity->head;
204 # convert at least MIME word encoded attachment filename
205 foreach my $attr (qw(content-type.name content-disposition.filename)) {
206 if ( my $name = $head->mime_attr($attr) and !$preserve_words ) {
207 $head->mime_attr( $attr => DecodeMIMEWordsToUTF8($name) );
211 # If this is a textual entity, we'd need to preserve its original encoding
212 $head->add( "X-RT-Original-Encoding" => $charset )
213 if $head->mime_attr('content-type.charset') or $head->mime_type =~ /^text/;
216 return unless ( $head->mime_type =~ qr{^(text/plain|message/rfc822)$}i );
219 my $body = $entity->bodyhandle;
221 if ( $enc ne $charset && $body) {
222 my @lines = $body->as_lines or return;
224 # {{{ Convert the body
226 $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". ($head->get('subject') || 'Subjectless message'));
228 # NOTE:: see the comments at the end of the sub.
229 Encode::_utf8_off( $lines[$_] ) foreach ( 0 .. $#lines );
230 Encode::from_to( $lines[$_], $charset => $enc ) for ( 0 .. $#lines );
234 $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
236 Encode::from_to( $lines[$_], 'iso-8859-1' => $enc ) foreach ( 0 .. $#lines );
239 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
244 my $new_body = MIME::Body::InCore->new( \@lines );
246 # set up the new entity
247 $head->mime_attr( "content-type" => 'text/plain' )
248 unless ( $head->mime_attr("content-type") );
249 $head->mime_attr( "content-type.charset" => $enc );
250 $entity->bodyhandle($new_body);
254 # NOTES: Why Encode::_utf8_off before Encode::from_to
256 # All the strings in RT are utf-8 now. Quotes from Encode POD:
258 # [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
259 # ... The data in $octets must be encoded as octets and not as
260 # characters in Perl's internal format. ...
262 # Not turning off the UTF-8 flag in the string will prevent the string
267 # {{{ DecodeMIMEWordsToUTF8
269 =head2 DecodeMIMEWordsToUTF8 $raw
271 An utility method which mimics MIME::Words::decode_mimewords, but only
272 limited functionality. This function returns an utf-8 string.
274 It returns the decoded string, or the original string if it's not
275 encoded. Since the subroutine converts specified string into utf-8
276 charset, it should not alter a subject written in English.
278 Why not use MIME::Words directly? Because it fails in RT when I
279 tried. Maybe it's ok now.
283 sub DecodeMIMEWordsToUTF8 {
285 DecodeMIMEWordsToEncoding($str, 'utf-8');
288 sub DecodeMIMEWordsToEncoding {
293 @_ = $str =~ m/([^=]*)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/g;
295 return ($str) unless (@_);
299 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
300 (shift, shift, shift, shift, shift);
302 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
304 if ($encoding eq 'Q' or $encoding eq 'q') {
305 use MIME::QuotedPrint;
306 $enc_str =~ tr/_/ /; # Observed from Outlook Express
307 $enc_str = decode_qp($enc_str);
308 } elsif ($encoding eq 'B' or $encoding eq 'b') {
310 $enc_str = decode_base64($enc_str);
312 $RT::Logger->warning("RT::I18N::DecodeMIMEWordsToCharset got a " .
313 "strange encoding: $encoding.");
316 # now we have got a decoded subject, try to convert into the encoding
317 unless ($charset eq $enc) {
318 eval { Encode::from_to($enc_str, $charset, $enc) };
320 $charset = _GuessCharset( $enc_str );
321 Encode::from_to($enc_str, $charset, $enc);
325 # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
326 # We _should_ be preserving them encoded until after parsing is completed and
327 # THEN undo the mime-encoding.
329 # This routine should be translating the existing mimeencoding to utf8 but leaving
332 # It's legal for headers to contain mime-encoded commas and semicolons which
333 # should not be treated as address separators. (Encoding == quoting here)
335 # until this is fixed, we must escape any string containing a comma or semicolon
336 # this is only a bandaid
338 $enc_str = qq{"$enc_str"} if ($enc_str =~ /[,;]/);
339 $str .= $prefix . $enc_str . $trailing;
347 # {{{ _FindOrGuessCharset
349 =head2 _FindOrGuessCharset MIME::Entity, $head_only
351 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
353 If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
357 sub _FindOrGuessCharset {
359 my $head_only = shift;
360 my $head = $entity->head;
362 if ($head->mime_attr("content-type.charset")) {
363 return $head->mime_attr("content-type.charset");
366 if ( !$head_only and $head->mime_type =~ m{^text/}) {
367 my $body = $entity->bodyhandle or return;
368 return _GuessCharset( $body->as_string );
371 # potentially binary data -- don't guess the body
372 return _GuessCharset( $head->as_string );
380 =head2 _GuessCharset STRING
382 use Encode::Guess to try to figure it out the string's encoding.
387 my $fallback = 'iso-8859-1';
390 if ( @RT::EmailInputEncodings and eval { require Encode::Guess; 1 } ) {
391 Encode::Guess->set_suspects(@RT::EmailInputEncodings);
392 my $decoder = Encode::Guess->guess( $_[0] );
394 if ( ref $decoder ) {
395 $charset = $decoder->name;
396 $RT::Logger->debug("Guessed encoding: $charset");
399 elsif ($decoder =~ /(\S+ or .+)/) {
400 my %matched = map { $_ => 1 } split(/ or /, $1);
401 return 'utf-8' if $matched{'utf8'}; # one and only normalization
403 foreach my $suspect (@RT::EmailInputEncodings) {
404 next unless $matched{$suspect};
405 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
411 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
415 $RT::Logger->warning("Cannot Encode::Guess; fallback to $fallback");
418 return($charset || $fallback);
423 # {{{ SetMIMEHeadToEncoding
425 =head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
427 Converts a MIME Head from one encoding to another. This totally violates the RFC.
428 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
434 sub SetMIMEHeadToEncoding {
435 my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
437 $charset = 'utf-8' if $charset eq 'utf8';
438 $enc = 'utf-8' if $enc eq 'utf8';
440 return if $charset eq $enc and $preserve_words;
442 foreach my $tag ( $head->tags ) {
443 next unless $tag; # seen in wild: headers with no name
444 my @values = $head->get_all($tag);
446 foreach my $value (@values) {
447 if ( $charset ne $enc ) {
450 Encode::_utf8_off($value);
451 Encode::from_to( $value, $charset => $enc );
454 $RT::Logger->error( "Encoding error: " . $@
455 . " defaulting to ISO-8859-1 -> UTF-8" );
456 eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
458 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
462 $value = DecodeMIMEWordsToEncoding( $value, $enc ) unless $preserve_words;
463 $head->add( $tag, $value );
470 eval "require RT::I18N_Vendor";
471 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Vendor.pm});
472 eval "require RT::I18N_Local";
473 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Local.pm});