1 # BEGIN BPS TAGGED BLOCK {{{
5 # This software is Copyright (c) 1996-2011 Best Practical Solutions, LLC
6 # <sales@bestpractical.com>
8 # (Except where explicitly superseded by other copyright notices)
13 # This work is made available to you under the terms of Version 2 of
14 # the GNU General Public License. A copy of that license should have
15 # been provided with this software, but in any event can be snarfed
18 # This work is distributed in the hope that it will be useful, but
19 # WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 # 02110-1301 or visit their web page on the internet at
27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
30 # CONTRIBUTION SUBMISSION POLICY:
32 # (The following paragraph is not intended to limit the rights granted
33 # to you to modify and distribute this software under the terms of
34 # the GNU General Public License and is only of importance to you if
35 # you choose to contribute your changes and enhancements to the
36 # community by submitting them to Best Practical Solutions, LLC.)
38 # By intentionally submitting any modifications, corrections or
39 # derivatives to this work, or any other work intended for use with
40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
41 # you are the copyright holder for those contributions and you grant
42 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
43 # royalty-free, perpetual, license to use, copy, create derivative
44 # works based on those contributions, and sublicense and distribute
45 # those contributions and any derivatives thereof.
47 # END BPS TAGGED BLOCK }}}
51 RT::I18N - a base class for localization of RT
60 use Locale::Maketext 1.04;
61 use Locale::Maketext::Lexicon 0.25;
62 use base ('Locale::Maketext::Fuzzy');
68 # I decree that this project's first language is English.
71 'TEST_STRING' => 'Concrete Mixer',
73 '__Content-Type' => 'text/plain; charset=utf-8',
76 # That means that lookup failures can't happen -- if we get as far
77 # as looking for something in this lexicon, and we don't find it,
78 # then automagically set $Lexicon{$key} = $key, before possibly
81 # The exception is keys that start with "_" -- they aren't auto-makeable.
88 Initializes the lexicons used for localization.
96 my @lang = RT->Config->Get('LexiconLanguages');
97 @lang = ('*') unless @lang;
99 # load default functions
100 require substr(__FILE__, 0, -3) . '/i_default.pm';
102 # Load language-specific functions
103 foreach my $file ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm") ) {
104 unless ( $file =~ /^([-\w\s\.\/\\~:]+)$/ ) {
105 warn("$file is tainted. not loading");
110 my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/);
111 next unless grep $_ eq '*' || $_ eq $lang, @lang;
116 foreach my $l ( @lang ) {
118 Gettext => (substr(__FILE__, 0, -3) . "/$l.po"),
119 Gettext => "$RT::LocalLexiconPath/*/$l.po",
120 Gettext => "$RT::LocalLexiconPath/$l.po",
122 push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po');
125 # Acquire all .po files and iterate them into lexicons
126 Locale::Maketext::Lexicon->import({ _decode => 1, %import });
134 foreach my $k (keys %{RT::I18N::} ) {
135 next if $k eq 'main::';
136 next unless index($k, '::', -2) >= 0;
137 next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'};
139 my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH};
140 # run fetch to force load
141 my $tmp = $lex->{'foo'};
142 # XXX: untie may fail with "untie attempted
143 # while 1 inner references still exist"
144 # TODO: untie that has to lower fetch impact
145 # untie %$lex if tied %$lex;
151 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
152 If it can't find anything, it returns 'ISO-8859-1'
159 sub encoding { 'utf-8' }
161 # {{{ SetMIMEEntityToUTF8
163 =head2 SetMIMEEntityToUTF8 $entity
165 An utility function which will try to convert entity body into utf8.
166 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
170 sub SetMIMEEntityToUTF8 {
171 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
176 # {{{ IsTextualContentType
178 =head2 IsTextualContentType $type
180 An utility function that determines whether $type is I<textual>, meaning
181 that it can sensibly be converted to Unicode text.
183 Currently, it returns true iff $type matches this regular expression
184 (case-insensitively):
186 ^(?:text/(?:plain|html)|message/rfc822)\b
192 sub IsTextualContentType {
194 ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
197 # {{{ SetMIMEEntityToEncoding
199 =head2 SetMIMEEntityToEncoding $entity, $encoding
201 An utility function which will try to convert entity body into specified
202 charset encoding (encoded as octets, *not* unicode-strings). It will
203 iterate all the entities in $entity, and try to convert each one into
204 specified charset if whose Content-Type is 'text/plain'.
206 the methods are tries in order:
207 1) to convert the entity to $encoding,
208 2) to interpret the entity as iso-8859-1 and then convert it to $encoding,
209 3) forcibly convert it to $encoding.
211 This function doesn't return anything meaningful.
215 sub SetMIMEEntityToEncoding {
216 my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
218 # do the same for parts first of all
219 SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
221 my $charset = _FindOrGuessCharset($entity) or return;
222 # one and only normalization
223 $charset = 'utf-8' if $charset =~ /^utf-?8$/i;
224 $enc = 'utf-8' if $enc =~ /^utf-?8$/i;
226 SetMIMEHeadToEncoding(
228 _FindOrGuessCharset($entity, 1) => $enc,
232 my $head = $entity->head;
234 # convert at least MIME word encoded attachment filename
235 foreach my $attr (qw(content-type.name content-disposition.filename)) {
236 if ( my $name = $head->mime_attr($attr) and !$preserve_words ) {
237 $head->mime_attr( $attr => DecodeMIMEWordsToUTF8($name) );
241 # If this is a textual entity, we'd need to preserve its original encoding
242 $head->replace( "X-RT-Original-Encoding" => $charset )
243 if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
245 return unless IsTextualContentType($head->mime_type);
247 my $body = $entity->bodyhandle;
249 if ( $enc ne $charset && $body ) {
250 my $string = $body->as_string or return;
251 # NOTE:: see the comments at the end of the sub.
252 Encode::_utf8_off($string);
253 my $orig_string = $string;
255 # {{{ Convert the body
257 $RT::Logger->debug( "Converting '$charset' to '$enc' for "
258 . $head->mime_type . " - "
259 . ( $head->get('subject') || 'Subjectless message' ) );
260 Encode::from_to( $string, $charset => $enc, Encode::FB_CROAK );
264 $RT::Logger->error( "Encoding error: "
266 . " falling back to iso-8859-1 => $enc" );
267 $string = $orig_string;
271 'iso-8859-1' => $enc,
276 $RT::Logger->error( "Encoding error: "
278 . " forcing conversion to $charset => $enc" );
279 $string = $orig_string;
280 Encode::from_to( $string, $charset => $enc );
286 my $new_body = MIME::Body::InCore->new($string);
288 # set up the new entity
289 $head->mime_attr( "content-type" => 'text/plain' )
290 unless ( $head->mime_attr("content-type") );
291 $head->mime_attr( "content-type.charset" => $enc );
292 $entity->bodyhandle($new_body);
296 # NOTES: Why Encode::_utf8_off before Encode::from_to
298 # All the strings in RT are utf-8 now. Quotes from Encode POD:
300 # [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
301 # ... The data in $octets must be encoded as octets and not as
302 # characters in Perl's internal format. ...
304 # Not turning off the UTF-8 flag in the string will prevent the string
309 # {{{ DecodeMIMEWordsToUTF8
311 =head2 DecodeMIMEWordsToUTF8 $raw
313 An utility method which mimics MIME::Words::decode_mimewords, but only
314 limited functionality. This function returns an utf-8 string.
316 It returns the decoded string, or the original string if it's not
317 encoded. Since the subroutine converts specified string into utf-8
318 charset, it should not alter a subject written in English.
320 Why not use MIME::Words directly? Because it fails in RT when I
321 tried. Maybe it's ok now.
325 sub DecodeMIMEWordsToUTF8 {
327 return DecodeMIMEWordsToEncoding($str, 'utf-8', @_);
330 sub DecodeMIMEWordsToEncoding {
332 my $to_charset = shift;
333 my $field = shift || '';
335 my @list = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs;
336 return ($str) unless (@list);
338 # add everything that hasn't matched to the end of the latest
339 # string in array this happen when we have 'key="=?encoded?="; key="plain"'
340 $list[-1] .= substr($str, pos $str);
344 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
346 $encoding = lc $encoding;
348 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
350 if ( $encoding eq 'q' ) {
351 use MIME::QuotedPrint;
352 $enc_str =~ tr/_/ /; # Observed from Outlook Express
353 $enc_str = decode_qp($enc_str);
354 } elsif ( $encoding eq 'b' ) {
356 $enc_str = decode_base64($enc_str);
358 $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
359 ."only Q(uoted-printable) and B(ase64) are supported");
362 # now we have got a decoded subject, try to convert into the encoding
363 unless ( $charset eq $to_charset ) {
364 my $orig_str = $enc_str;
365 eval { Encode::from_to( $enc_str, $charset, $to_charset, Encode::FB_CROAK ) };
367 $enc_str = $orig_str;
368 $charset = _GuessCharset( $enc_str );
369 Encode::from_to( $enc_str, $charset, $to_charset );
373 # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
374 # We _should_ be preserving them encoded until after parsing is completed and
375 # THEN undo the mime-encoding.
377 # This routine should be translating the existing mimeencoding to utf8 but leaving
380 # It's legal for headers to contain mime-encoded commas and semicolons which
381 # should not be treated as address separators. (Encoding == quoting here)
383 # until this is fixed, we must escape any string containing a comma or semicolon
384 # this is only a bandaid
386 # Some _other_ MUAs encode quotes _already_, and double quotes
387 # confuse us a lot, so only quote it if it isn't quoted
389 $enc_str = qq{"$enc_str"}
390 if $enc_str =~ /[,;]/
391 and $enc_str !~ /^".*"$/
392 and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i);
394 $str .= $prefix . $enc_str . $trailing;
397 # We might have \n without trailing whitespace, which will result in
406 # {{{ _FindOrGuessCharset
408 =head2 _FindOrGuessCharset MIME::Entity, $head_only
410 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
412 If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
416 sub _FindOrGuessCharset {
418 my $head_only = shift;
419 my $head = $entity->head;
421 if ( my $charset = $head->mime_attr("content-type.charset") ) {
425 if ( !$head_only and $head->mime_type =~ m{^text/}) {
426 my $body = $entity->bodyhandle or return;
427 return _GuessCharset( $body->as_string );
430 # potentially binary data -- don't guess the body
431 return _GuessCharset( $head->as_string );
439 =head2 _GuessCharset STRING
441 use Encode::Guess to try to figure it out the string's encoding.
446 my $fallback = 'iso-8859-1';
448 # if $_[0] is null/empty, we don't guess its encoding
449 return $fallback unless defined $_[0] && length $_[0];
452 my @encodings = RT->Config->Get('EmailInputEncodings');
453 if ( @encodings and eval { require Encode::Guess; 1 } ) {
454 Encode::Guess->set_suspects( @encodings );
455 my $decoder = Encode::Guess->guess( $_[0] );
457 if ( defined($decoder) ) {
458 if ( ref $decoder ) {
459 $charset = $decoder->name;
460 $RT::Logger->debug("Guessed encoding: $charset");
463 elsif ($decoder =~ /(\S+ or .+)/) {
464 my %matched = map { $_ => 1 } split(/ or /, $1);
465 return 'utf-8' if $matched{'utf8'}; # one and only normalization
467 foreach my $suspect (RT->Config->Get('EmailInputEncodings')) {
468 next unless $matched{$suspect};
469 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
475 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
479 $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
482 elsif ( @encodings && $@ ) {
483 $RT::Logger->error("You have set EmailInputEncodings, but we couldn't load Encode::Guess: $@");
485 $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback");
488 return ($charset || $fallback);
493 # {{{ SetMIMEHeadToEncoding
495 =head2 SetMIMEHeadToEncoding HEAD OLD_CHARSET NEW_CHARSET
497 Converts a MIME Head from one encoding to another. This totally violates the RFC.
498 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
504 sub SetMIMEHeadToEncoding {
505 my ( $head, $charset, $enc, $preserve_words ) = ( shift, shift, shift, shift );
507 $charset = 'utf-8' if $charset eq 'utf8';
508 $enc = 'utf-8' if $enc eq 'utf8';
510 return if $charset eq $enc and $preserve_words;
512 foreach my $tag ( $head->tags ) {
513 next unless $tag; # seen in wild: headers with no name
514 my @values = $head->get_all($tag);
516 foreach my $value (@values) {
517 Encode::_utf8_off($value);
518 my $orig_value = $value;
519 if ( $charset ne $enc ) {
521 Encode::from_to( $value, $charset => $enc, Encode::FB_CROAK );
524 $RT::Logger->error( "Encoding error: "
526 . " falling back to iso-8859-1 => $enc" );
527 $value = $orig_value;
531 'iso-8859-1' => $enc,
536 $RT::Logger->error( "Encoding error: "
538 . " forcing conversion to $charset => $enc" );
539 $value = $orig_value;
540 Encode::from_to( $value, $charset => $enc );
544 $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
545 unless $preserve_words;
546 $head->add( $tag, $value );
553 RT::Base->_ImportOverlays();