3 # Copyright (c) 1996-2003 Jesse Vincent <jesse@bestpractical.com>
5 # (Except where explictly superceded by other copyright notices)
7 # This work is made available to you under the terms of Version 2 of
8 # the GNU General Public License. A copy of that license should have
9 # been provided with this software, but in any event can be snarfed
12 # This work is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # General Public License for more details.
17 # Unless otherwise specified, all modifications, corrections or
18 # extensions to this work which alter its source code become the
19 # property of Best Practical Solutions, LLC when submitted for
20 # inclusion in the work.
26 RT::I18N - a base class for localization of RT
33 use Locale::Maketext 1.04;
34 use Locale::Maketext::Lexicon 0.25;
35 use base ('Locale::Maketext::Fuzzy');
36 use vars qw( %Lexicon );
38 #If we're running on 5.6, we desperately need Encode::compat. But if we're on 5.8, we don't really need it.
39 BEGIN { if ($] < 5.007001) {
40 require Encode::compat;
47 # I decree that this project's first language is English.
50 'TEST_STRING' => 'Concrete Mixer',
52 '__Content-Type' => 'text/plain; charset=utf-8',
55 # That means that lookup failures can't happen -- if we get as far
56 # as looking for something in this lexicon, and we don't find it,
57 # then automagically set $Lexicon{$key} = $key, before possibly
60 # The exception is keys that start with "_" -- they aren't auto-makeable.
67 Initializes the lexicons used for localization.
79 # Load language-specific functions
80 foreach my $language ( glob(substr(__FILE__, 0, -3) . "/*.pm")) {
81 if ($language =~ /^([-\w.\/\\~:]+)$/) {
85 warn("$language is tainted. not loading");
89 my @lang = @RT::LexiconLanguages;
90 @lang = ('*') unless @lang;
92 # Acquire all .po files and iterate them into lexicons
93 Locale::Maketext::Lexicon->import({
96 Gettext => (substr(__FILE__, 0, -3) . "/$_.po"),
97 Gettext => "$RT::LocalLexiconPath/*/$_.po",
107 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
108 If it can't find anything, it returns 'ISO-8859-1'
112 ok(my $chinese = RT::I18N->get_handle('zh_tw'));
113 ok(UNIVERSAL::can($chinese, 'maketext'));
114 ok($chinese->maketext('__Content-Type') =~ /utf-8/i, "Found the utf-8 charset for traditional chinese in the string ".$chinese->maketext('__Content-Type'));
115 ok($chinese->encoding eq 'utf-8', "The encoding is 'utf-8' -".$chinese->encoding);
117 ok(my $en = RT::I18N->get_handle('en'));
118 ok(UNIVERSAL::can($en, 'maketext'));
119 ok($en->encoding eq 'utf-8', "The encoding ".$en->encoding." is 'utf-8'");
127 sub encoding { 'utf-8' }
129 # {{{ SetMIMEEntityToUTF8
131 =head2 SetMIMEEntityToUTF8 $entity
133 An utility method which will try to convert entity body into utf8.
134 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
138 sub SetMIMEEntityToUTF8 {
139 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
144 # {{{ SetMIMEEntityToEncoding
146 =head2 SetMIMEEntityToEncoding $entity, $encoding
148 An utility method which will try to convert entity body into specified
149 charset encoding (encoded as octets, *not* unicode-strings). It will
150 iterate all the entities in $entity, and try to convert each one into
151 specified charset if whose Content-Type is 'text/plain'.
153 This method doesn't return anything meaningful.
157 sub SetMIMEEntityToEncoding {
158 my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
160 #if ( $entity->is_multipart ) {
161 #$RT::Logger->crit("This entity is a multipart " . $entity->head->as_string);
162 SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
165 my $charset = _FindOrGuessCharset($entity) or return;
166 # one and only normalization
167 $charset = 'utf-8' if $charset =~ /^utf-?8$/i;
168 $enc = 'utf-8' if $enc =~ /^utf-?8$/i;
170 SetMIMEHeadToEncoding(
172 _FindOrGuessCharset($entity, 1) => $enc,
176 my $head = $entity->head;
178 # convert at least MIME word encoded attachment filename
179 foreach my $attr (qw(content-type.name content-disposition.filename)) {
180 if ( my $name = $head->mime_attr($attr) and !$preserve_words ) {
181 $head->mime_attr( $attr => DecodeMIMEWordsToUTF8($name) );
185 # If this is a textual entity, we'd need to preserve its original encoding
186 $head->add( "X-RT-Original-Encoding" => $charset )
187 if $head->mime_attr('content-type.charset') or $head->mime_type =~ /^text/;
190 return unless ( $head->mime_type =~ qr{^(text/plain|message/rfc822)$}i );
193 my $body = $entity->bodyhandle;
195 if ( $enc ne $charset && $body) {
196 my @lines = $body->as_lines or return;
198 # {{{ Convert the body
200 $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". $head->get('subject'));
202 # NOTE:: see the comments at the end of the sub.
203 Encode::_utf8_off( $lines[$_] ) foreach ( 0 .. $#lines );
204 Encode::from_to( $lines[$_], $charset => $enc ) for ( 0 .. $#lines );
208 $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
210 Encode::from_to( $lines[$_], 'iso-8859-1' => $enc ) foreach ( 0 .. $#lines );
213 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
218 my $new_body = MIME::Body::InCore->new( \@lines );
220 # set up the new entity
221 $head->mime_attr( "content-type" => 'text/plain' )
222 unless ( $head->mime_attr("content-type") );
223 $head->mime_attr( "content-type.charset" => $enc );
224 $entity->bodyhandle($new_body);
228 # NOTES: Why Encode::_utf8_off before Encode::from_to
230 # All the strings in RT are utf-8 now. Quotes from Encode POD:
232 # [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
233 # ... The data in $octets must be encoded as octets and not as
234 # characters in Perl's internal format. ...
236 # Not turning off the UTF-8 flag in the string will prevent the string
241 # {{{ DecodeMIMEWordsToUTF8
243 =head2 DecodeMIMEWordsToUTF8 $raw
245 An utility method which mimics MIME::Words::decode_mimewords, but only
246 limited functionality. This function returns an utf-8 string.
248 It returns the decoded string, or the original string if it's not
249 encoded. Since the subroutine converts specified string into utf-8
250 charset, it should not alter a subject written in English.
252 Why not use MIME::Words directly? Because it fails in RT when I
253 tried. Maybe it's ok now.
257 sub DecodeMIMEWordsToUTF8 {
259 DecodeMIMEWordsToEncoding($str, 'utf-8');
262 sub DecodeMIMEWordsToEncoding {
267 @_ = $str =~ m/([^=]*)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/g;
269 return ($str) unless (@_);
273 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
274 (shift, shift, shift, shift, shift);
276 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
278 if ($encoding eq 'Q' or $encoding eq 'q') {
279 use MIME::QuotedPrint;
280 $enc_str =~ tr/_/ /; # Observed from Outlook Express
281 $enc_str = decode_qp($enc_str);
282 } elsif ($encoding eq 'B' or $encoding eq 'b') {
284 $enc_str = decode_base64($enc_str);
286 $RT::Logger->warning("RT::I18N::DecodeMIMEWordsToCharset got a " .
287 "strange encoding: $encoding.");
290 # now we have got a decoded subject, try to convert into the encoding
291 unless ($charset eq $enc) {
292 eval { Encode::from_to($enc_str, $charset, $enc) };
294 $charset = _GuessCharset( $enc_str );
295 Encode::from_to($enc_str, $charset, $enc);
299 $str .= $prefix . $enc_str . $trailing;
307 # {{{ _FindOrGuessCharset
309 =head2 _FindOrGuessCharset MIME::Entity, $head_only
311 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
313 If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
317 sub _FindOrGuessCharset {
319 my $head_only = shift;
320 my $head = $entity->head;
322 if ($head->mime_attr("content-type.charset")) {
323 return $head->mime_attr("content-type.charset");
326 if ( !$head_only and $head->mime_type =~ m{^text/}) {
327 my $body = $entity->bodyhandle or return;
328 return _GuessCharset( $body->as_string );
331 # potentially binary data -- don't guess the body
332 return _GuessCharset( $head->as_string );
341 =head2 _GuessCharset STRING
343 use Encode::Guess to try to figure it out the string's encoding.
348 my $fallback = 'iso-8859-1';
351 if ( @RT::EmailInputEncodings and eval { require Encode::Guess; 1 } ) {
352 Encode::Guess->set_suspects(@RT::EmailInputEncodings);
353 my $decoder = Encode::Guess->guess( $_[0] );
355 if ( ref $decoder ) {
356 $charset = $decoder->name;
357 $RT::Logger->debug("Guessed encoding: $charset");
360 elsif ($decoder =~ /(\S+ or .+)/) {
361 my %matched = map { $_ => 1 } split(/ or /, $1);
362 return 'utf-8' if $matched{'utf8'}; # one and only normalization
364 foreach my $suspect (@RT::EmailInputEncodings) {
365 next unless $matched{$suspect};
366 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
372 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
376 $RT::Logger->warning("Cannot Encode::Guess; fallback to $fallback");
379 return($charset || $fallback);
384 # {{{ SetMIMEHeadToEncoding
386 =head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
388 Converts a MIME Head from one encoding to another. This totally violates the RFC.
389 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
395 sub SetMIMEHeadToEncoding {
396 my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
398 $charset = 'utf-8' if $charset eq 'utf8';
399 $enc = 'utf-8' if $enc eq 'utf8';
401 return if $charset eq $enc and $preserve_words;
403 foreach my $tag ( $head->tags ) {
404 my @values = $head->get_all($tag);
406 foreach my $value (@values) {
407 if ( $charset ne $enc ) {
410 Encode::_utf8_off($value);
411 Encode::from_to( $value, $charset => $enc );
414 $RT::Logger->error( "Encoding error: " . $@
415 . " defaulting to ISO-8859-1 -> UTF-8" );
416 eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
418 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
422 $value = DecodeMIMEWordsToEncoding( $value, $enc ) unless $preserve_words;
423 $head->add( $tag, $value );
430 eval "require RT::I18N_Vendor";
431 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Vendor.pm});
432 eval "require RT::I18N_Local";
433 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Local.pm});