3 # Copyright (c) 1996-2003 Jesse Vincent <jesse@bestpractical.com>
5 # (Except where explictly superceded by other copyright notices)
7 # This work is made available to you under the terms of Version 2 of
8 # the GNU General Public License. A copy of that license should have
9 # been provided with this software, but in any event can be snarfed
12 # This work is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # General Public License for more details.
17 # Unless otherwise specified, all modifications, corrections or
18 # extensions to this work which alter its source code become the
19 # property of Best Practical Solutions, LLC when submitted for
20 # inclusion in the work.
26 RT::I18N - a base class for localization of RT
33 use Locale::Maketext 1.04;
34 use Locale::Maketext::Lexicon 0.25;
35 use base ('Locale::Maketext::Fuzzy');
36 use vars qw( %Lexicon );
38 #If we're running on 5.6, we desperately need Encode::compat. But if we're on 5.8, we don't really need it.
39 BEGIN { if ($] < 5.007001) {
40 require Encode::compat;
47 # I decree that this project's first language is English.
50 'TEST_STRING' => 'Concrete Mixer',
52 '__Content-Type' => 'text/plain; charset=utf-8',
55 # That means that lookup failures can't happen -- if we get as far
56 # as looking for something in this lexicon, and we don't find it,
57 # then automagically set $Lexicon{$key} = $key, before possibly
60 # The exception is keys that start with "_" -- they aren't auto-makeable.
67 Initializes the lexicons used for localization.
79 # Load language-specific functions
80 foreach my $language ( glob(substr(__FILE__, 0, -3) . "/*.pm")) {
81 if ($language =~ /^([-\w.\/\\~:]+)$/) {
85 warn("$language is tainted. not loading");
89 my @lang = @RT::LexiconLanguages;
90 @lang = ('*') unless @lang;
92 # Acquire all .po files and iterate them into lexicons
93 Locale::Maketext::Lexicon->import({
96 Gettext => (substr(__FILE__, 0, -3) . "/$_.po"),
97 Gettext => "$RT::LocalLexiconPath/*/$_.po",
107 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
108 If it can't find anything, it returns 'ISO-8859-1'
112 ok(my $chinese = RT::I18N->get_handle('zh_tw'));
113 ok(UNIVERSAL::can($chinese, 'maketext'));
114 ok($chinese->maketext('__Content-Type') =~ /utf-8/i, "Found the utf-8 charset for traditional chinese in the string ".$chinese->maketext('__Content-Type'));
115 ok($chinese->encoding eq 'utf-8', "The encoding is 'utf-8' -".$chinese->encoding);
117 ok(my $en = RT::I18N->get_handle('en'));
118 ok(UNIVERSAL::can($en, 'maketext'));
119 ok($en->encoding eq 'utf-8', "The encoding ".$en->encoding." is 'utf-8'");
127 sub encoding { 'utf-8' }
129 # {{{ SetMIMEEntityToUTF8
131 =head2 SetMIMEEntityToUTF8 $entity
133 An utility method which will try to convert entity body into utf8.
134 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
138 sub SetMIMEEntityToUTF8 {
139 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
144 # {{{ SetMIMEEntityToEncoding
146 =head2 SetMIMEEntityToEncoding $entity, $encoding
148 An utility method which will try to convert entity body into specified
149 charset encoding (encoded as octets, *not* unicode-strings). It will
150 iterate all the entities in $entity, and try to convert each one into
151 specified charset if whose Content-Type is 'text/plain'.
153 This method doesn't return anything meaningful.
157 sub SetMIMEEntityToEncoding {
158 my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
160 #if ( $entity->is_multipart ) {
161 #$RT::Logger->crit("This entity is a multipart " . $entity->head->as_string);
162 SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
165 my $charset = _FindOrGuessCharset($entity) or return;
166 # one and only normalization
167 $charset = 'utf-8' if $charset eq 'utf8';
168 $enc = 'utf-8' if $enc eq 'utf8';
170 SetMIMEHeadToEncoding($entity->head, $charset => $enc, $preserve_words);
172 my $head = $entity->head;
174 # convert at least MIME word encoded attachment filename
175 foreach my $attr (qw(content-type.name content-disposition.filename)) {
176 if ( my $name = $head->mime_attr($attr) and !$preserve_words ) {
177 $head->mime_attr( $attr => DecodeMIMEWordsToUTF8($name) );
181 # If this is a textual entity, we'd need to preserve its original encoding
182 $head->add( "X-RT-Original-Encoding" => $charset )
183 if $head->mime_attr('content-type.charset') or $head->mime_type =~ /^text/;
186 return unless ( $head->mime_type =~ qr{^(text/plain|message/rfc822)$}i );
189 my $body = $entity->bodyhandle;
191 if ( $enc ne $charset && $body) {
192 my @lines = $body->as_lines or return;
194 # {{{ Convert the body
196 $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". $head->get('subject'));
198 # NOTE:: see the comments at the end of the sub.
199 Encode::_utf8_off( $lines[$_] ) foreach ( 0 .. $#lines );
200 Encode::from_to( $lines[$_], $charset => $enc ) for ( 0 .. $#lines );
204 $RT::Logger->error( "Encoding error: " . $@ . " defaulting to ISO-8859-1 -> UTF-8" );
206 Encode::from_to( $lines[$_], 'iso-8859-1' => $enc ) foreach ( 0 .. $#lines );
209 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
214 my $new_body = MIME::Body::InCore->new( \@lines );
216 # set up the new entity
217 $head->mime_attr( "content-type" => 'text/plain' )
218 unless ( $head->mime_attr("content-type") );
219 $head->mime_attr( "content-type.charset" => $enc );
220 $entity->bodyhandle($new_body);
224 # NOTES: Why Encode::_utf8_off before Encode::from_to
226 # All the strings in RT are utf-8 now. Quotes from Encode POD:
228 # [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
229 # ... The data in $octets must be encoded as octets and not as
230 # characters in Perl's internal format. ...
232 # Not turning off the UTF-8 flag in the string will prevent the string
237 # {{{ DecodeMIMEWordsToUTF8
239 =head2 DecodeMIMEWordsToUTF8 $raw
241 An utility method which mimics MIME::Words::decode_mimewords, but only
242 limited functionality. This function returns an utf-8 string.
244 It returns the decoded string, or the original string if it's not
245 encoded. Since the subroutine converts specified string into utf-8
246 charset, it should not alter a subject written in English.
248 Why not use MIME::Words directly? Because it fails in RT when I
249 tried. Maybe it's ok now.
253 sub DecodeMIMEWordsToUTF8 {
255 DecodeMIMEWordsToEncoding($str, 'utf-8');
258 sub DecodeMIMEWordsToEncoding {
263 @_ = $str =~ m/([^=]*)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/g;
265 return ($str) unless (@_);
269 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
270 (shift, shift, shift, shift, shift);
272 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
274 if ($encoding eq 'Q' or $encoding eq 'q') {
275 use MIME::QuotedPrint;
276 $enc_str =~ tr/_/ /; # Observed from Outlook Express
277 $enc_str = decode_qp($enc_str);
278 } elsif ($encoding eq 'B' or $encoding eq 'b') {
280 $enc_str = decode_base64($enc_str);
282 $RT::Logger->warning("RT::I18N::DecodeMIMEWordsToCharset got a " .
283 "strange encoding: $encoding.");
286 # now we have got a decoded subject, try to convert into the encoding
287 unless ($charset eq $enc) {
288 eval { Encode::from_to($enc_str, $charset, $enc) };
290 $charset = _GuessCharset( $enc_str );
291 Encode::from_to($enc_str, $charset, $enc);
295 $str .= $prefix . $enc_str . $trailing;
303 # {{{ _FindOrGuessCharset
305 =head2 _FindOrGuessCharset MIME::Entity
307 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that,
308 will use Encode::Guess to try to figure it out
312 sub _FindOrGuessCharset {
314 my $head = $entity->head;
316 if ($head->mime_attr("content-type.charset")) {
317 return $head->mime_attr("content-type.charset");
320 if ( $head->mime_type =~ m{^text/}) {
321 my $body = $entity->bodyhandle or return;
322 return _GuessCharset( $head->as_string . $body->as_string );
325 # potentially binary data -- don't guess the body
326 return _GuessCharset( $head->as_string );
335 =head2 _GuessCharset STRING
337 use Encode::Guess to try to figure it out the string's encoding.
342 my $fallback = 'iso-8859-1';
345 if ( @RT::EmailInputEncodings and eval { require Encode::Guess; 1 } ) {
346 Encode::Guess->set_suspects(@RT::EmailInputEncodings);
347 my $decoder = Encode::Guess->guess( $_[0] );
349 if ( ref $decoder ) {
350 $charset = $decoder->name;
351 $RT::Logger->debug("Guessed encoding: $charset");
354 elsif ($decoder =~ /(\S+ or .+)/) {
355 my %matched = map { $_ => 1 } split(/ or /, $1);
356 return 'utf-8' if $matched{'utf8'}; # one and only normalization
358 foreach my $suspect (@RT::EmailInputEncodings) {
359 next unless $matched{$suspect};
360 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
366 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
370 $RT::Logger->warning("Cannot Encode::Guess; fallback to $fallback");
373 return($charset || $fallback);
378 # {{{ SetMIMEHeadToEncoding
380 =head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
382 Converts a MIME Head from one encoding to another. This totally violates the RFC.
383 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
389 sub SetMIMEHeadToEncoding {
390 my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
392 $charset = 'utf-8' if $charset eq 'utf8';
393 $enc = 'utf-8' if $enc eq 'utf8';
395 return if $charset eq $enc and $preserve_words;
397 foreach my $tag ( $head->tags ) {
398 my @values = $head->get_all($tag);
400 foreach my $value (@values) {
401 if ( $charset ne $enc ) {
404 Encode::_utf8_off($value);
405 Encode::from_to( $value, $charset => $enc );
408 $RT::Logger->error( "Encoding error: " . $@
409 . " defaulting to ISO-8859-1 -> UTF-8" );
410 eval { Encode::from_to( $value, 'iso-8859-1' => $enc ) };
412 $RT::Logger->crit( "Totally failed to convert to utf-8: " . $@ . " I give up" );
416 $value = DecodeMIMEWordsToEncoding( $value, $enc ) unless $preserve_words;
417 $head->add( $tag, $value );
424 eval "require RT::I18N_Vendor";
425 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Vendor.pm});
426 eval "require RT::I18N_Local";
427 die $@ if ($@ && $@ !~ qr{^Can't locate RT/I18N_Local.pm});