rt/lib/RT/I18N.pm

   1 # BEGIN BPS TAGGED BLOCK {{{
   2 #
   3 # COPYRIGHT:
   4 #
   5 # This software is Copyright (c) 1996-2015 Best Practical Solutions, LLC
   6 #                                          <sales@bestpractical.com>
   7 #
   8 # (Except where explicitly superseded by other copyright notices)
   9 #
  10 #
  11 # LICENSE:
  12 #
  13 # This work is made available to you under the terms of Version 2 of
  14 # the GNU General Public License. A copy of that license should have
  15 # been provided with this software, but in any event can be snarfed
  16 # from www.gnu.org.
  17 #
  18 # This work is distributed in the hope that it will be useful, but
  19 # WITHOUT ANY WARRANTY; without even the implied warranty of
  20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 # General Public License for more details.
  22 #
  23 # You should have received a copy of the GNU General Public License
  24 # along with this program; if not, write to the Free Software
  25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  26 # 02110-1301 or visit their web page on the internet at
  27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
  28 #
  29 #
  30 # CONTRIBUTION SUBMISSION POLICY:
  31 #
  32 # (The following paragraph is not intended to limit the rights granted
  33 # to you to modify and distribute this software under the terms of
  34 # the GNU General Public License and is only of importance to you if
  35 # you choose to contribute your changes and enhancements to the
  36 # community by submitting them to Best Practical Solutions, LLC.)
  37 #
  38 # By intentionally submitting any modifications, corrections or
  39 # derivatives to this work, or any other work intended for use with
  40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
  41 # you are the copyright holder for those contributions and you grant
  42 # Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
  43 # royalty-free, perpetual, license to use, copy, create derivative
  44 # works based on those contributions, and sublicense and distribute
  45 # those contributions and any derivatives thereof.
  46 #
  47 # END BPS TAGGED BLOCK }}}
  48
  49 =head1 NAME
  50
  51 RT::I18N - a base class for localization of RT
  52
  53 =cut
  54
  55 package RT::I18N;
  56
  57 use strict;
  58 use warnings;
  59
  60
  61 use Locale::Maketext 1.04;
  62 use Locale::Maketext::Lexicon 0.25;
  63 use base 'Locale::Maketext::Fuzzy';
  64
  65 use MIME::Entity;
  66 use MIME::Head;
  67 use File::Glob;
  68
  69 # I decree that this project's first language is English.
  70
  71 our %Lexicon = (
  72    'TEST_STRING' => 'Concrete Mixer',
  73
  74     '__Content-Type' => 'text/plain; charset=utf-8',
  75
  76   '_AUTO' => 1,
  77   # That means that lookup failures can't happen -- if we get as far
  78   #  as looking for something in this lexicon, and we don't find it,
  79   #  then automagically set $Lexicon{$key} = $key, before possibly
  80   #  compiling it.
  81
  82   # The exception is keys that start with "_" -- they aren't auto-makeable.
  83
  84 );
  85 # End of lexicon.
  86
  87 =head2 Init
  88
  89 Initializes the lexicons used for localization.
  90
  91
  92 =cut
  93
  94 sub Init {
  95
  96     my @lang = RT->Config->Get('LexiconLanguages');
  97     @lang = ('*') unless @lang;
  98
  99     # load default functions
 100     require substr(__FILE__, 0, -3) . '/i_default.pm';
 101
 102     # Load language-specific functions
 103     foreach my $file ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm") ) {
 104         my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/);
 105         next unless grep $_ eq '*' || $_ eq $lang, @lang;
 106         require $file;
 107     }
 108
 109     my %import;
 110     foreach my $l ( @lang ) {
 111         $import{$l} = [
 112             Gettext => $RT::LexiconPath."/$l.po",
 113         ];
 114         push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po');
 115         push @{ $import{$l} }, (Gettext => $RT::LocalLexiconPath."/*/$l.po",
 116                                 Gettext => $RT::LocalLexiconPath."/$l.po");
 117     }
 118
 119     # Acquire all .po files and iterate them into lexicons
 120     Locale::Maketext::Lexicon->import({ _decode => 1, %import });
 121
 122     return 1;
 123 }
 124
 125 sub LoadLexicons {
 126
 127     no strict 'refs';
 128     foreach my $k (keys %{RT::I18N::} ) {
 129         next if $k eq 'main::';
 130         next unless index($k, '::', -2) >= 0;
 131         next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'};
 132
 133         my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH};
 134         # run fetch to force load
 135         my $tmp = $lex->{'foo'};
 136         # XXX: untie may fail with "untie attempted
 137         # while 1 inner references still exist"
 138         # TODO: untie that has to lower fetch impact
 139         # untie %$lex if tied %$lex;
 140     }
 141 }
 142
 143 =head2 encoding
 144
 145 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
 146 If it can't find anything, it returns 'ISO-8859-1'
 147
 148
 149
 150 =cut
 151
 152
 153 sub encoding { 'utf-8' }
 154
 155
 156 =head2 SetMIMEEntityToUTF8 $entity
 157
 158 An utility function which will try to convert entity body into utf8.
 159 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
 160
 161 =cut
 162
 163 sub SetMIMEEntityToUTF8 {
 164     RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
 165 }
 166
 167
 168
 169 =head2 IsTextualContentType $type
 170
 171 An utility function that determines whether $type is I<textual>, meaning
 172 that it can sensibly be converted to Unicode text.
 173
 174 Currently, it returns true iff $type matches this regular expression
 175 (case-insensitively):
 176
 177     ^(?:text/(?:plain|html)|message/rfc822)\b
 178
 179
 180 =cut
 181
 182 sub IsTextualContentType {
 183     my $type = shift;
 184     ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
 185 }
 186
 187
 188 =head2 SetMIMEEntityToEncoding Entity => ENTITY, Encoding => ENCODING, PreserveWords => BOOL, IsOut => BOOL
 189
 190 An utility function which will try to convert entity body into specified
 191 charset encoding (encoded as octets, *not* unicode-strings).  It will
 192 iterate all the entities in $entity, and try to convert each one into
 193 specified charset if whose Content-Type is 'text/plain'.
 194
 195 If PreserveWords is true, values in mime head will be decoded.(default is false)
 196
 197 Incoming and outgoing mails are handled differently, if IsOut is true(default
 198 is false), it'll be treated as outgoing mail, otherwise incomding mail:
 199
 200 incoming mail:
 201 1) find encoding
 202 2) if found then try to convert to utf-8 in croak mode, return if success
 203 3) guess encoding
 204 4) if guessed differently then try to convert to utf-8 in croak mode, return
 205    if success
 206 5) mark part as application/octet-stream instead of falling back to any
 207    encoding
 208
 209 outgoing mail:
 210 1) find encoding
 211 2) if didn't find then do nothing, send as is, let MUA deal with it
 212 3) if found then try to convert it to outgoing encoding in croak mode, return
 213    if success
 214 4) do nothing otherwise, keep original encoding
 215
 216 This function doesn't return anything meaningful.
 217
 218 =cut
 219
 220 sub SetMIMEEntityToEncoding {
 221     my ( $entity, $enc, $preserve_words, $is_out );
 222
 223     if ( @_ <= 3 ) {
 224         ( $entity, $enc, $preserve_words ) = @_;
 225     }
 226     else {
 227         my %args = (
 228             Entity        => undef,
 229             Encoding      => undef,
 230             PreserveWords => undef,
 231             IsOut         => undef,
 232             @_,
 233         );
 234
 235         $entity         = $args{Entity};
 236         $enc            = $args{Encoding};
 237         $preserve_words = $args{PreserveWords};
 238         $is_out         = $args{IsOut};
 239     }
 240
 241     unless ( $entity && $enc ) {
 242         RT->Logger->error("Missing Entity or Encoding arguments");
 243         return;
 244     }
 245
 246     # do the same for parts first of all
 247     SetMIMEEntityToEncoding(
 248         Entity        => $_,
 249         Encoding      => $enc,
 250         PreserveWords => $preserve_words,
 251         IsOut         => $is_out,
 252     ) foreach $entity->parts;
 253
 254     my $head = $entity->head;
 255
 256     my $charset = _FindOrGuessCharset($entity);
 257     if ( $charset ) {
 258         unless( Encode::find_encoding($charset) ) {
 259             $RT::Logger->warning("Encoding '$charset' is not supported");
 260             $charset = undef;
 261         }
 262     }
 263     unless ( $charset ) {
 264         $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') );
 265         $head->mime_attr('Content-Type' => 'application/octet-stream');
 266         return;
 267     }
 268
 269     SetMIMEHeadToEncoding(
 270         Head          => $head,
 271         From          => _FindOrGuessCharset( $entity, 1 ),
 272         To            => $enc,
 273         PreserveWords => $preserve_words,
 274         IsOut         => $is_out,
 275     );
 276
 277     # If this is a textual entity, we'd need to preserve its original encoding
 278     $head->replace( "X-RT-Original-Encoding" => Encode::encode( "UTF-8", $charset ) )
 279         if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
 280
 281     return unless IsTextualContentType($head->mime_type);
 282
 283     my $body = $entity->bodyhandle;
 284
 285     if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
 286         my $string = $body->as_string or return;
 287         RT::Util::assert_bytes($string);
 288
 289         $RT::Logger->debug( "Converting '$charset' to '$enc' for "
 290               . $head->mime_type . " - "
 291               . ( Encode::decode("UTF-8",$head->get('subject')) || 'Subjectless message' ) );
 292
 293         my $orig_string = $string;
 294         ( my $success, $string ) = EncodeFromToWithCroak( $orig_string, $charset => $enc );
 295         if ( !$success ) {
 296             return if $is_out;
 297             my $error = $string;
 298
 299             my $guess = _GuessCharset($orig_string);
 300             if ( $guess && $guess ne $charset ) {
 301                 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
 302                 ( $success, $string ) = EncodeFromToWithCroak( $orig_string, $guess, $enc );
 303                 $error = $string unless $success;
 304             }
 305
 306             if ( !$success ) {
 307                 $RT::Logger->error( "Encoding error: " . $error . " falling back to application/octet-stream" );
 308                 $head->mime_attr( "content-type" => 'application/octet-stream' );
 309                 return;
 310             }
 311         }
 312
 313         my $new_body = MIME::Body::InCore->new($string);
 314
 315         # set up the new entity
 316         $head->mime_attr( "content-type" => 'text/plain' )
 317           unless ( $head->mime_attr("content-type") );
 318         $head->mime_attr( "content-type.charset" => $enc );
 319         $entity->bodyhandle($new_body);
 320     }
 321 }
 322
 323 =head2 DecodeMIMEWordsToUTF8 $raw
 324
 325 An utility method which mimics MIME::Words::decode_mimewords, but only
 326 limited functionality.  Despite its name, this function returns the
 327 bytes of the string, in UTF-8.
 328
 329 =cut
 330
 331 sub DecodeMIMEWordsToUTF8 {
 332     my $str = shift;
 333     return DecodeMIMEWordsToEncoding($str, 'utf-8', @_);
 334 }
 335
 336 sub DecodeMIMEWordsToEncoding {
 337     my $str = shift;
 338     my $to_charset = _CanonicalizeCharset(shift);
 339     my $field = shift || '';
 340     $RT::Logger->warning(
 341         "DecodeMIMEWordsToEncoding was called without field name."
 342         ."It's known to cause troubles with decoding fields properly."
 343     ) unless $field;
 344
 345     # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
 346     # We _should_ be preserving them encoded until after parsing is completed and
 347     # THEN undo the mime-encoding.
 348     #
 349     # This routine should be translating the existing mimeencoding to utf8 but leaving
 350     # things encoded.
 351     #
 352     # It's legal for headers to contain mime-encoded commas and semicolons which
 353     # should not be treated as address separators. (Encoding == quoting here)
 354     #
 355     # until this is fixed, we must escape any string containing a comma or semicolon
 356     # this is only a bandaid
 357
 358     # Some _other_ MUAs encode quotes _already_, and double quotes
 359     # confuse us a lot, so only quote it if it isn't quoted
 360     # already.
 361
 362     # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
 363     # continuations, and similar syntax from RFC 2231
 364     if ($field =~ /^Content-/i) {
 365         # This concatenates continued parameters and normalizes encoded params
 366         # to QB encoded-words which we handle below
 367         my $params = MIME::Field::ParamVal->parse_params($str);
 368         foreach my $v ( values %$params ) {
 369             $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
 370             # de-quote in case those were hidden inside encoded part
 371             $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
 372         }
 373         $str = bless({}, 'MIME::Field::ParamVal')->set($params)->stringify;
 374     }
 375     elsif ( $field =~ /^(?:Resent-)?(?:To|From|B?Cc|Sender|Reply-To)$/i ) {
 376         my @addresses = RT::EmailParser->ParseEmailAddress( $str );
 377         foreach my $address ( @addresses ) {
 378             foreach my $field (qw(phrase comment)) {
 379                 my $v = $address->$field() or next;
 380                 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
 381                 if ( $field eq 'phrase' ) {
 382                     # de-quote in case quoted value were hidden inside encoded part
 383                     $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
 384                 }
 385                 $address->$field($v);
 386             }
 387         }
 388         $str = join ', ', map $_->format, @addresses;
 389     }
 390     else {
 391         $str = _DecodeMIMEWordsToEncoding( $str, $to_charset );
 392     }
 393
 394
 395     # We might have \n without trailing whitespace, which will result in
 396     # invalid headers.
 397     $str =~ s/\n//g;
 398
 399     return ($str)
 400 }
 401
 402 sub _DecodeMIMEWordsToEncoding {
 403     my $str = shift;
 404     my $to_charset = shift;
 405
 406     # Pre-parse by removing all whitespace between encoded words
 407     my $encoded_word = qr/
 408                  =\?            # =?
 409                  ([^?]+?)       # charset
 410                  (?:\*[^?]+)?   # optional '*language'
 411                  \?             # ?
 412                  ([QqBb])       # encoding
 413                  \?             # ?
 414                  ([^?]+)        # encoded string
 415                  \?=            # ?=
 416                  /x;
 417     $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
 418
 419     # Also merge quoted-printable sections together, in case multiple
 420     # octets of a single encoded character were split between chunks.
 421     # Though not valid according to RFC 2047, this has been seen in the
 422     # wild.
 423     1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
 424
 425     # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
 426     # custom MIME word decoding and charset canonicalization.  We can't do this
 427     # until we parse before decode, instead of the other way around.
 428     my @list = $str =~ m/(.*?)          # prefix
 429                          $encoded_word
 430                          ([^=]*)        # trailing
 431                         /xgcs;
 432     return $str unless @list;
 433
 434     # add everything that hasn't matched to the end of the latest
 435     # string in array this happen when we have 'key="=?encoded?="; key="plain"'
 436     $list[-1] .= substr($str, pos $str);
 437
 438     $str = '';
 439     while (@list) {
 440         my ($prefix, $charset, $encoding, $enc_str, $trailing) =
 441                 splice @list, 0, 5;
 442         $charset  = _CanonicalizeCharset($charset);
 443         $encoding = lc $encoding;
 444
 445         $trailing =~ s/\s?\t?$//;               # Observed from Outlook Express
 446
 447         if ( $encoding eq 'q' ) {
 448             use MIME::QuotedPrint;
 449             $enc_str =~ tr/_/ /;                # Observed from Outlook Express
 450             $enc_str = decode_qp($enc_str);
 451         } elsif ( $encoding eq 'b' ) {
 452             use MIME::Base64;
 453             $enc_str = decode_base64($enc_str);
 454         } else {
 455             $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
 456                 ."only Q(uoted-printable) and B(ase64) are supported");
 457         }
 458
 459         # now we have got a decoded subject, try to convert into the encoding
 460         if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
 461             if ( Encode::find_encoding($charset) ) {
 462                 Encode::from_to( $enc_str, $charset, $to_charset );
 463             } else {
 464                 $RT::Logger->warning("Charset '$charset' is not supported");
 465                 $enc_str =~ s/[^[:print:]]/\357\277\275/g;
 466                 Encode::from_to( $enc_str, 'UTF-8', $to_charset )
 467                     unless $to_charset eq 'utf-8';
 468             }
 469         }
 470         $str .= $prefix . $enc_str . $trailing;
 471     }
 472
 473     return ($str)
 474 }
 475
 476
 477 =head2 _FindOrGuessCharset MIME::Entity, $head_only
 478
 479 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
 480
 481 If $head_only is true, only guesses charset for head parts.  This is because header's encoding (e.g. filename="...") may be different from that of body's.
 482
 483 =cut
 484
 485 sub _FindOrGuessCharset {
 486     my $entity = shift;
 487     my $head_only = shift;
 488     my $head = $entity->head;
 489
 490     if ( my $charset = $head->mime_attr("content-type.charset") ) {
 491         return _CanonicalizeCharset($charset);
 492     }
 493
 494     if ( !$head_only and $head->mime_type =~ m{^text/} ) {
 495         my $body = $entity->bodyhandle or return;
 496         return _GuessCharset( $body->as_string );
 497     }
 498     else {
 499
 500         # potentially binary data -- don't guess the body
 501         return _GuessCharset( $head->as_string );
 502     }
 503 }
 504
 505
 506
 507 =head2 _GuessCharset STRING
 508
 509 use Encode::Guess to try to figure it out the string's encoding.
 510
 511 =cut
 512
 513 use constant HAS_ENCODE_GUESS => Encode::Guess->require;
 514 use constant HAS_ENCODE_DETECT => Encode::Detect::Detector->require;
 515
 516 sub _GuessCharset {
 517     my $fallback = _CanonicalizeCharset('iso-8859-1');
 518
 519     # if $_[0] is null/empty, we don't guess its encoding
 520     return $fallback
 521         unless defined $_[0] && length $_[0];
 522
 523     my @encodings = RT->Config->Get('EmailInputEncodings');
 524     unless ( @encodings ) {
 525         $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback");
 526         return $fallback;
 527     }
 528
 529     if ( $encodings[0] eq '*' ) {
 530         shift @encodings;
 531         if ( HAS_ENCODE_DETECT ) {
 532             my $charset = Encode::Detect::Detector::detect( $_[0] );
 533             if ( $charset ) {
 534                 $RT::Logger->debug("Encode::Detect::Detector guessed encoding: $charset");
 535                 return _CanonicalizeCharset( Encode::resolve_alias( $charset ) );
 536             }
 537             else {
 538                 $RT::Logger->debug("Encode::Detect::Detector failed to guess encoding");
 539             }
 540         }
 541         else {
 542             $RT::Logger->error(
 543                 "You requested to guess encoding, but we couldn't"
 544                 ." load Encode::Detect::Detector module"
 545             );
 546         }
 547     }
 548
 549     unless ( @encodings ) {
 550         $RT::Logger->warning("No EmailInputEncodings set except '*', fallback to $fallback");
 551         return $fallback;
 552     }
 553
 554     unless ( HAS_ENCODE_GUESS ) {
 555         $RT::Logger->error("We couldn't load Encode::Guess module, fallback to $fallback");
 556         return $fallback;
 557     }
 558
 559     Encode::Guess->set_suspects( @encodings );
 560     my $decoder = Encode::Guess->guess( $_[0] );
 561     unless ( defined $decoder ) {
 562         $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
 563         return $fallback;
 564     }
 565
 566     if ( ref $decoder ) {
 567         my $charset = $decoder->name;
 568         $RT::Logger->debug("Encode::Guess guessed encoding: $charset");
 569         return _CanonicalizeCharset( $charset );
 570     }
 571     elsif ($decoder =~ /(\S+ or .+)/) {
 572         my %matched = map { $_ => 1 } split(/ or /, $1);
 573         return 'utf-8' if $matched{'utf8'}; # one and only normalization
 574
 575         foreach my $suspect (RT->Config->Get('EmailInputEncodings')) {
 576             next unless $matched{$suspect};
 577             $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
 578             return _CanonicalizeCharset( $suspect );
 579         }
 580     }
 581     else {
 582         $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
 583     }
 584
 585     return $fallback;
 586 }
 587
 588 =head2 _CanonicalizeCharset NAME
 589
 590 canonicalize charset, return lowercase version.
 591 special cases are: gb2312 => gbk, utf8 => utf-8
 592
 593 =cut
 594
 595 sub _CanonicalizeCharset {
 596     my $charset = lc shift;
 597     return $charset unless $charset;
 598
 599     # Canonicalize aliases if they're known
 600     if (my $canonical = Encode::resolve_alias($charset)) {
 601         $charset = $canonical;
 602     }
 603
 604     if ( $charset eq 'utf8' || $charset eq 'utf-8-strict' ) {
 605         return 'utf-8';
 606     }
 607     elsif ( $charset eq 'euc-cn' ) {
 608         # gbk is superset of gb2312/euc-cn so it's safe
 609         return 'gbk';
 610     }
 611     elsif ( $charset =~ /^(?:(?:big5(-1984|-2003|ext|plus))|cccii|unisys|euc-tw|gb18030|(?:cns11643-\d+))$/ ) {
 612         unless ( Encode::HanExtra->require ) {
 613             RT->Logger->error("Please install Encode::HanExtra to handle $charset");
 614         }
 615         return $charset;
 616     }
 617     else {
 618         return $charset;
 619     }
 620 }
 621
 622
 623 =head2 SetMIMEHeadToEncoding MIMEHead => HEAD, From => OLD_ENCODING, To => NEW_Encoding, PreserveWords => BOOL, IsOut => BOOL
 624
 625 Converts a MIME Head from one encoding to another. This totally violates the RFC.
 626 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
 627 all the time
 628
 629
 630 =cut
 631
 632 sub SetMIMEHeadToEncoding {
 633     my ( $head, $charset, $enc, $preserve_words, $is_out );
 634
 635     if ( @_ <= 4 ) {
 636         ( $head, $charset, $enc, $preserve_words ) = @_;
 637     }
 638     else {
 639         my %args = (
 640             Head      => undef,
 641             From          => undef,
 642             To            => undef,
 643             PreserveWords => undef,
 644             IsOut         => undef,
 645             @_,
 646         );
 647
 648         $head           = $args{Head};
 649         $charset        = $args{From};
 650         $enc            = $args{To};
 651         $preserve_words = $args{PreserveWords};
 652         $is_out         = $args{IsOut};
 653     }
 654
 655     unless ( $head && $charset && $enc ) {
 656         RT->Logger->error(
 657             "Missing Head or From or To arguments");
 658         return;
 659     }
 660
 661     $charset = _CanonicalizeCharset($charset);
 662     $enc     = _CanonicalizeCharset($enc);
 663
 664     return if $charset eq $enc and $preserve_words;
 665
 666     RT::Util::assert_bytes( $head->as_string );
 667     foreach my $tag ( $head->tags ) {
 668         next unless $tag; # seen in wild: headers with no name
 669         my @values = $head->get_all($tag);
 670         $head->delete($tag);
 671         foreach my $value (@values) {
 672             if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
 673                 my $orig_value = $value;
 674                 ( my $success, $value ) = EncodeFromToWithCroak( $orig_value, $charset => $enc );
 675                 if ( !$success ) {
 676                     my $error = $value;
 677                     if ($is_out) {
 678                         $value = $orig_value;
 679                         $head->add( $tag, $value );
 680                         next;
 681                     }
 682
 683                     my $guess = _GuessCharset($orig_value);
 684                     if ( $guess && $guess ne $charset ) {
 685                         $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
 686                         ( $success, $value ) = EncodeFromToWithCroak( $orig_value, $guess, $enc );
 687                         $error = $value unless $success;
 688                     }
 689
 690                     if ( !$success ) {
 691                         $RT::Logger->error( "Encoding error: " . $error . " forcing conversion to $charset => $enc" );
 692                         $value = $orig_value;
 693                         Encode::from_to( $value, $charset => $enc );
 694                     }
 695                 }
 696             }
 697
 698             $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
 699                 unless $preserve_words;
 700
 701             # We intentionally add a leading space when re-adding the
 702             # header; Mail::Header strips it before storing, but it
 703             # serves to prevent it from "helpfully" canonicalizing
 704             # $head->add("Subject", "Subject: foo") into the same as
 705             # $head->add("Subject", "foo");
 706             $head->add( $tag, " " . $value );
 707         }
 708     }
 709
 710 }
 711
 712 =head2 EncodeFromToWithCroak $string, $from, $to
 713
 714 Try to encode string from encoding $from to encoding $to in croak mode
 715
 716 return (1, $encoded_string) if success, otherwise (0, $error)
 717
 718 =cut
 719
 720 sub EncodeFromToWithCroak {
 721     my $string = shift;
 722     my $from   = shift;
 723     my $to     = shift;
 724
 725     eval {
 726         no warnings 'utf8';
 727         $string = Encode::encode( $to, Encode::decode( $from, $string ), Encode::FB_CROAK );
 728     };
 729     return $@ ? ( 0, $@ ) : ( 1, $string );
 730 }
 731
 732 RT::Base->_ImportOverlays();
 733
 734 1;  # End of module.
 735