rt/lib/RT/I18N.pm

   1 # BEGIN BPS TAGGED BLOCK {{{
   2 #
   3 # COPYRIGHT:
   4 #
   5 # This software is Copyright (c) 1996-2019 Best Practical Solutions, LLC
   6 #                                          <sales@bestpractical.com>
   7 #
   8 # (Except where explicitly superseded by other copyright notices)
   9 #
  10 #
  11 # LICENSE:
  12 #
  13 # This work is made available to you under the terms of Version 2 of
  14 # the GNU General Public License. A copy of that license should have
  15 # been provided with this software, but in any event can be snarfed
  16 # from www.gnu.org.
  17 #
  18 # This work is distributed in the hope that it will be useful, but
  19 # WITHOUT ANY WARRANTY; without even the implied warranty of
  20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 # General Public License for more details.
  22 #
  23 # You should have received a copy of the GNU General Public License
  24 # along with this program; if not, write to the Free Software
  25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  26 # 02110-1301 or visit their web page on the internet at
  27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
  28 #
  29 #
  30 # CONTRIBUTION SUBMISSION POLICY:
  31 #
  32 # (The following paragraph is not intended to limit the rights granted
  33 # to you to modify and distribute this software under the terms of
  34 # the GNU General Public License and is only of importance to you if
  35 # you choose to contribute your changes and enhancements to the
  36 # community by submitting them to Best Practical Solutions, LLC.)
  37 #
  38 # By intentionally submitting any modifications, corrections or
  39 # derivatives to this work, or any other work intended for use with
  40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
  41 # you are the copyright holder for those contributions and you grant
  42 # Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
  43 # royalty-free, perpetual, license to use, copy, create derivative
  44 # works based on those contributions, and sublicense and distribute
  45 # those contributions and any derivatives thereof.
  46 #
  47 # END BPS TAGGED BLOCK }}}
  48
  49 =head1 NAME
  50
  51 RT::I18N - a base class for localization of RT
  52
  53 =cut
  54
  55 package RT::I18N;
  56
  57 use strict;
  58 use warnings;
  59 use Cwd ();
  60
  61
  62 use Locale::Maketext 1.04;
  63 use Locale::Maketext::Lexicon 0.25;
  64 use base 'Locale::Maketext::Fuzzy';
  65
  66 use MIME::Entity;
  67 use MIME::Head;
  68 use File::Glob;
  69
  70 # I decree that this project's first language is English.
  71
  72 our %Lexicon = (
  73    'TEST_STRING' => 'Concrete Mixer',
  74
  75     '__Content-Type' => 'text/plain; charset=utf-8',
  76
  77   '_AUTO' => 1,
  78   # That means that lookup failures can't happen -- if we get as far
  79   #  as looking for something in this lexicon, and we don't find it,
  80   #  then automagically set $Lexicon{$key} = $key, before possibly
  81   #  compiling it.
  82
  83   # The exception is keys that start with "_" -- they aren't auto-makeable.
  84
  85 );
  86 # End of lexicon.
  87
  88 =head2 Init
  89
  90 Initializes the lexicons used for localization.
  91
  92
  93 =cut
  94
  95 sub Init {
  96
  97     my @lang = RT->Config->Get('LexiconLanguages');
  98     @lang = ('*') unless @lang;
  99
 100     # load default functions
 101     require substr(Cwd::abs_path(__FILE__), 0, -3) . '/i_default.pm';
 102
 103     # Load language-specific functions
 104     foreach my $file ( File::Glob::bsd_glob(substr(Cwd::abs_path(__FILE__), 0, -3) . "/*.pm") ) {
 105         my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/);
 106         next unless grep $_ eq '*' || $_ eq $lang, @lang;
 107         require $file;
 108     }
 109
 110     my %import;
 111     foreach my $l ( @lang ) {
 112         $import{$l} = [
 113             Gettext => $RT::LexiconPath."/$l.po",
 114         ];
 115         push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po');
 116         push @{ $import{$l} }, (Gettext => $RT::LocalLexiconPath."/*/$l.po",
 117                                 Gettext => $RT::LocalLexiconPath."/$l.po");
 118     }
 119
 120     # Acquire all .po files and iterate them into lexicons
 121     Locale::Maketext::Lexicon->import({ _decode => 1, %import });
 122
 123     return 1;
 124 }
 125
 126 sub LoadLexicons {
 127
 128     no strict 'refs';
 129     foreach my $k (keys %{RT::I18N::} ) {
 130         next if $k eq 'main::';
 131         next unless index($k, '::', -2) >= 0;
 132         next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'};
 133
 134         my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH};
 135         # run fetch to force load
 136         my $tmp = $lex->{'foo'};
 137         # XXX: untie may fail with "untie attempted
 138         # while 1 inner references still exist"
 139         # TODO: untie that has to lower fetch impact
 140         # untie %$lex if tied %$lex;
 141     }
 142 }
 143
 144 =head2 encoding
 145
 146 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
 147 If it can't find anything, it returns 'ISO-8859-1'
 148
 149
 150
 151 =cut
 152
 153
 154 sub encoding { 'utf-8' }
 155
 156
 157 =head2 SetMIMEEntityToUTF8 $entity
 158
 159 An utility function which will try to convert entity body into utf8.
 160 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
 161
 162 =cut
 163
 164 sub SetMIMEEntityToUTF8 {
 165     RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
 166 }
 167
 168
 169
 170 =head2 IsTextualContentType $type
 171
 172 An utility function that determines whether $type is I<textual>, meaning
 173 that it can sensibly be converted to Unicode text.
 174
 175 Currently, it returns true iff $type matches this regular expression
 176 (case-insensitively):
 177
 178     ^(?:text/(?:plain|html)|message/rfc822)\b
 179
 180
 181 =cut
 182
 183 sub IsTextualContentType {
 184     my $type = shift;
 185     ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
 186 }
 187
 188
 189 =head2 SetMIMEEntityToEncoding Entity => ENTITY, Encoding => ENCODING, PreserveWords => BOOL, IsOut => BOOL
 190
 191 An utility function which will try to convert entity body into specified
 192 charset encoding (encoded as octets, *not* unicode-strings).  It will
 193 iterate all the entities in $entity, and try to convert each one into
 194 specified charset if whose Content-Type is 'text/plain'.
 195
 196 If PreserveWords is true, values in mime head will be decoded.(default is false)
 197
 198 Incoming and outgoing mails are handled differently, if IsOut is true(default
 199 is false), it'll be treated as outgoing mail, otherwise incomding mail:
 200
 201 incoming mail:
 202 1) find encoding
 203 2) if found then try to convert to utf-8 in croak mode, return if success
 204 3) guess encoding
 205 4) if guessed differently then try to convert to utf-8 in croak mode, return
 206    if success
 207 5) mark part as application/octet-stream instead of falling back to any
 208    encoding
 209
 210 outgoing mail:
 211 1) find encoding
 212 2) if didn't find then do nothing, send as is, let MUA deal with it
 213 3) if found then try to convert it to outgoing encoding in croak mode, return
 214    if success
 215 4) do nothing otherwise, keep original encoding
 216
 217 This function doesn't return anything meaningful.
 218
 219 =cut
 220
 221 sub SetMIMEEntityToEncoding {
 222     my ( $entity, $enc, $preserve_words, $is_out );
 223
 224     if ( @_ <= 3 ) {
 225         ( $entity, $enc, $preserve_words ) = @_;
 226     }
 227     else {
 228         my %args = (
 229             Entity        => undef,
 230             Encoding      => undef,
 231             PreserveWords => undef,
 232             IsOut         => undef,
 233             @_,
 234         );
 235
 236         $entity         = $args{Entity};
 237         $enc            = $args{Encoding};
 238         $preserve_words = $args{PreserveWords};
 239         $is_out         = $args{IsOut};
 240     }
 241
 242     unless ( $entity && $enc ) {
 243         RT->Logger->error("Missing Entity or Encoding arguments");
 244         return;
 245     }
 246
 247     # do the same for parts first of all
 248     SetMIMEEntityToEncoding(
 249         Entity        => $_,
 250         Encoding      => $enc,
 251         PreserveWords => $preserve_words,
 252         IsOut         => $is_out,
 253     ) foreach $entity->parts;
 254
 255     my $head = $entity->head;
 256
 257     my $charset = _FindOrGuessCharset($entity);
 258     if ( $charset ) {
 259         unless( Encode::find_encoding($charset) ) {
 260             $RT::Logger->warning("Encoding '$charset' is not supported");
 261             $charset = undef;
 262         }
 263     }
 264     unless ( $charset ) {
 265         $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') );
 266         $head->mime_attr('Content-Type' => 'application/octet-stream');
 267         return;
 268     }
 269
 270     SetMIMEHeadToEncoding(
 271         Head          => $head,
 272         From          => _FindOrGuessCharset( $entity, 1 ),
 273         To            => $enc,
 274         PreserveWords => $preserve_words,
 275         IsOut         => $is_out,
 276     );
 277
 278     # If this is a textual entity, we'd need to preserve its original encoding
 279     $head->replace( "X-RT-Original-Encoding" => Encode::encode( "UTF-8", $charset ) )
 280         if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
 281
 282     return unless IsTextualContentType($head->mime_type);
 283
 284     my $body = $entity->bodyhandle;
 285
 286     if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
 287         my $string = $body->as_string or return;
 288         RT::Util::assert_bytes($string);
 289
 290         $RT::Logger->debug( "Converting '$charset' to '$enc' for "
 291               . $head->mime_type . " - "
 292               . ( Encode::decode("UTF-8",$head->get('subject')) || 'Subjectless message' ) );
 293
 294         my $orig_string = $string;
 295         ( my $success, $string ) = EncodeFromToWithCroak( $orig_string, $charset => $enc );
 296         if ( !$success ) {
 297             return if $is_out;
 298             my $error = $string;
 299
 300             my $guess = _GuessCharset($orig_string);
 301             if ( $guess && $guess ne $charset ) {
 302                 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
 303                 ( $success, $string ) = EncodeFromToWithCroak( $orig_string, $guess, $enc );
 304                 $error = $string unless $success;
 305             }
 306
 307             if ( !$success ) {
 308                 $RT::Logger->error( "Encoding error: " . $error . " falling back to application/octet-stream" );
 309                 $head->mime_attr( "content-type" => 'application/octet-stream' );
 310                 return;
 311             }
 312         }
 313
 314         my $new_body = MIME::Body::InCore->new($string);
 315
 316         # set up the new entity
 317         $head->mime_attr( "content-type" => 'text/plain' )
 318           unless ( $head->mime_attr("content-type") );
 319         $head->mime_attr( "content-type.charset" => $enc );
 320         $entity->bodyhandle($new_body);
 321     }
 322 }
 323
 324 =head2 DecodeMIMEWordsToUTF8 $raw
 325
 326 An utility method which mimics MIME::Words::decode_mimewords, but only
 327 limited functionality.  Despite its name, this function returns the
 328 bytes of the string, in UTF-8.
 329
 330 =cut
 331
 332 sub DecodeMIMEWordsToUTF8 {
 333     my $str = shift;
 334     return DecodeMIMEWordsToEncoding($str, 'utf-8', @_);
 335 }
 336
 337 sub DecodeMIMEWordsToEncoding {
 338     my $str = shift;
 339     my $to_charset = _CanonicalizeCharset(shift);
 340     my $field = shift || '';
 341     $RT::Logger->warning(
 342         "DecodeMIMEWordsToEncoding was called without field name."
 343         ."It's known to cause troubles with decoding fields properly."
 344     ) unless $field;
 345
 346     # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
 347     # We _should_ be preserving them encoded until after parsing is completed and
 348     # THEN undo the mime-encoding.
 349     #
 350     # This routine should be translating the existing mimeencoding to utf8 but leaving
 351     # things encoded.
 352     #
 353     # It's legal for headers to contain mime-encoded commas and semicolons which
 354     # should not be treated as address separators. (Encoding == quoting here)
 355     #
 356     # until this is fixed, we must escape any string containing a comma or semicolon
 357     # this is only a bandaid
 358
 359     # Some _other_ MUAs encode quotes _already_, and double quotes
 360     # confuse us a lot, so only quote it if it isn't quoted
 361     # already.
 362
 363     # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
 364     # continuations, and similar syntax from RFC 2231
 365     if ($field =~ /^Content-/i) {
 366         # This concatenates continued parameters and normalizes encoded params
 367         # to QB encoded-words which we handle below
 368         my $params = MIME::Field::ParamVal->parse_params($str);
 369         foreach my $v ( values %$params ) {
 370             $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
 371             # de-quote in case those were hidden inside encoded part
 372             $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
 373         }
 374         $str = bless({}, 'MIME::Field::ParamVal')->set($params)->stringify;
 375     }
 376     elsif ( $field =~ /^(?:Resent-)?(?:To|From|B?Cc|Sender|Reply-To)$/i ) {
 377         my @addresses = RT::EmailParser->ParseEmailAddress( $str );
 378         foreach my $address ( @addresses ) {
 379             foreach my $field (qw(phrase comment)) {
 380                 my $v = $address->$field() or next;
 381                 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
 382                 if ( $field eq 'phrase' ) {
 383                     # de-quote in case quoted value were hidden inside encoded part
 384                     $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
 385                 }
 386                 $address->$field($v);
 387             }
 388         }
 389         $str = join ', ', map $_->format, @addresses;
 390     }
 391     else {
 392         $str = _DecodeMIMEWordsToEncoding( $str, $to_charset );
 393     }
 394
 395
 396     # We might have \n without trailing whitespace, which will result in
 397     # invalid headers.
 398     $str =~ s/\n//g;
 399
 400     return ($str)
 401 }
 402
 403 sub _DecodeMIMEWordsToEncoding {
 404     my $str = shift;
 405     my $to_charset = shift;
 406
 407     # Pre-parse by removing all whitespace between encoded words
 408     my $encoded_word = qr/
 409                  =\?            # =?
 410                  ([^?]+?)       # charset
 411                  (?:\*[^?]+)?   # optional '*language'
 412                  \?             # ?
 413                  ([QqBb])       # encoding
 414                  \?             # ?
 415                  ([^?]+)        # encoded string
 416                  \?=            # ?=
 417                  /x;
 418     $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
 419
 420     # Also merge quoted-printable sections together, in case multiple
 421     # octets of a single encoded character were split between chunks.
 422     # Though not valid according to RFC 2047, this has been seen in the
 423     # wild.
 424     1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
 425
 426     # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
 427     # custom MIME word decoding and charset canonicalization.  We can't do this
 428     # until we parse before decode, instead of the other way around.
 429     my @list = $str =~ m/(.*?)          # prefix
 430                          $encoded_word
 431                          ([^=]*)        # trailing
 432                         /xgcs;
 433     return $str unless @list;
 434
 435     # add everything that hasn't matched to the end of the latest
 436     # string in array this happen when we have 'key="=?encoded?="; key="plain"'
 437     $list[-1] .= substr($str, pos $str);
 438
 439     $str = '';
 440     while (@list) {
 441         my ($prefix, $charset, $encoding, $enc_str, $trailing) =
 442                 splice @list, 0, 5;
 443         $charset  = _CanonicalizeCharset($charset);
 444         $encoding = lc $encoding;
 445
 446         if ( $encoding eq 'q' ) {
 447             use MIME::QuotedPrint;
 448             $enc_str =~ tr/_/ /;              # RFC 2047, 4.2 (2)
 449             $enc_str = decode_qp($enc_str);
 450         } elsif ( $encoding eq 'b' ) {
 451             use MIME::Base64;
 452             $enc_str = decode_base64($enc_str);
 453         } else {
 454             $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
 455                 ."only Q(uoted-printable) and B(ase64) are supported");
 456         }
 457
 458         # now we have got a decoded subject, try to convert into the encoding
 459         if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
 460             if ( Encode::find_encoding($charset) ) {
 461                 Encode::from_to( $enc_str, $charset, $to_charset );
 462             } else {
 463                 $RT::Logger->warning("Charset '$charset' is not supported");
 464                 $enc_str =~ s/[^[:print:]]/\357\277\275/g;
 465                 Encode::from_to( $enc_str, 'UTF-8', $to_charset )
 466                     unless $to_charset eq 'utf-8';
 467             }
 468         }
 469         $str .= $prefix . $enc_str . $trailing;
 470     }
 471
 472     return ($str)
 473 }
 474
 475
 476 =head2 _FindOrGuessCharset MIME::Entity, $head_only
 477
 478 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
 479
 480 If $head_only is true, only guesses charset for head parts.  This is because header's encoding (e.g. filename="...") may be different from that of body's.
 481
 482 =cut
 483
 484 sub _FindOrGuessCharset {
 485     my $entity = shift;
 486     my $head_only = shift;
 487     my $head = $entity->head;
 488
 489     if ( my $charset = $head->mime_attr("content-type.charset") ) {
 490         return _CanonicalizeCharset($charset);
 491     }
 492
 493     if ( !$head_only and $head->mime_type =~ m{^text/} ) {
 494         my $body = $entity->bodyhandle or return;
 495         return _GuessCharset( $body->as_string );
 496     }
 497     else {
 498
 499         # potentially binary data -- don't guess the body
 500         return _GuessCharset( $head->as_string );
 501     }
 502 }
 503
 504
 505
 506 =head2 _GuessCharset STRING
 507
 508 use Encode::Guess to try to figure it out the string's encoding.
 509
 510 =cut
 511
 512 use constant HAS_ENCODE_GUESS => Encode::Guess->require;
 513 use constant HAS_ENCODE_DETECT => Encode::Detect::Detector->require;
 514
 515 sub _GuessCharset {
 516     my $fallback = _CanonicalizeCharset('iso-8859-1');
 517
 518     # if $_[0] is null/empty, we don't guess its encoding
 519     return $fallback
 520         unless defined $_[0] && length $_[0];
 521
 522     my @encodings = RT->Config->Get('EmailInputEncodings');
 523     unless ( @encodings ) {
 524         $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback");
 525         return $fallback;
 526     }
 527
 528     if ( $encodings[0] eq '*' ) {
 529         shift @encodings;
 530         if ( HAS_ENCODE_DETECT ) {
 531             my $charset = Encode::Detect::Detector::detect( $_[0] );
 532             if ( $charset ) {
 533                 $RT::Logger->debug("Encode::Detect::Detector guessed encoding: $charset");
 534                 return _CanonicalizeCharset( Encode::resolve_alias( $charset ) );
 535             }
 536             else {
 537                 $RT::Logger->debug("Encode::Detect::Detector failed to guess encoding");
 538             }
 539         }
 540         else {
 541             $RT::Logger->error(
 542                 "You requested to guess encoding, but we couldn't"
 543                 ." load Encode::Detect::Detector module"
 544             );
 545         }
 546     }
 547
 548     unless ( @encodings ) {
 549         $RT::Logger->warning("No EmailInputEncodings set except '*', fallback to $fallback");
 550         return $fallback;
 551     }
 552
 553     unless ( HAS_ENCODE_GUESS ) {
 554         $RT::Logger->error("We couldn't load Encode::Guess module, fallback to $fallback");
 555         return $fallback;
 556     }
 557
 558     Encode::Guess->set_suspects( @encodings );
 559     my $decoder = Encode::Guess->guess( $_[0] );
 560     unless ( defined $decoder ) {
 561         $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
 562         return $fallback;
 563     }
 564
 565     if ( ref $decoder ) {
 566         my $charset = $decoder->name;
 567         $RT::Logger->debug("Encode::Guess guessed encoding: $charset");
 568         return _CanonicalizeCharset( $charset );
 569     }
 570     elsif ($decoder =~ /(\S+ or .+)/) {
 571         my %matched = map { $_ => 1 } split(/ or /, $1);
 572         return 'utf-8' if $matched{'utf8'}; # one and only normalization
 573
 574         foreach my $suspect (RT->Config->Get('EmailInputEncodings')) {
 575             next unless $matched{$suspect};
 576             $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
 577             return _CanonicalizeCharset( $suspect );
 578         }
 579     }
 580     else {
 581         $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
 582     }
 583
 584     return $fallback;
 585 }
 586
 587 =head2 _CanonicalizeCharset NAME
 588
 589 canonicalize charset, return lowercase version.
 590 special cases are: gb2312 => gbk, utf8 => utf-8
 591
 592 =cut
 593
 594 sub _CanonicalizeCharset {
 595     my $charset = lc shift;
 596     return $charset unless $charset;
 597
 598     # Canonicalize aliases if they're known
 599     if (my $canonical = Encode::resolve_alias($charset)) {
 600         $charset = $canonical;
 601     }
 602
 603     if ( $charset eq 'utf8' || $charset eq 'utf-8-strict' ) {
 604         return 'utf-8';
 605     }
 606     elsif ( $charset eq 'euc-cn' ) {
 607         # gbk is superset of gb2312/euc-cn so it's safe
 608         return 'gbk';
 609     }
 610     elsif ( $charset =~ /^(?:(?:big5(-1984|-2003|ext|plus))|cccii|unisys|euc-tw|gb18030|(?:cns11643-\d+))$/ ) {
 611         unless ( Encode::HanExtra->require ) {
 612             RT->Logger->error("Please install Encode::HanExtra to handle $charset");
 613         }
 614         return $charset;
 615     }
 616     else {
 617         return $charset;
 618     }
 619 }
 620
 621
 622 =head2 SetMIMEHeadToEncoding MIMEHead => HEAD, From => OLD_ENCODING, To => NEW_Encoding, PreserveWords => BOOL, IsOut => BOOL
 623
 624 Converts a MIME Head from one encoding to another. This totally violates the RFC.
 625 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
 626 all the time
 627
 628
 629 =cut
 630
 631 sub SetMIMEHeadToEncoding {
 632     my ( $head, $charset, $enc, $preserve_words, $is_out );
 633
 634     if ( @_ <= 4 ) {
 635         ( $head, $charset, $enc, $preserve_words ) = @_;
 636     }
 637     else {
 638         my %args = (
 639             Head      => undef,
 640             From          => undef,
 641             To            => undef,
 642             PreserveWords => undef,
 643             IsOut         => undef,
 644             @_,
 645         );
 646
 647         $head           = $args{Head};
 648         $charset        = $args{From};
 649         $enc            = $args{To};
 650         $preserve_words = $args{PreserveWords};
 651         $is_out         = $args{IsOut};
 652     }
 653
 654     unless ( $head && $charset && $enc ) {
 655         RT->Logger->error(
 656             "Missing Head or From or To arguments");
 657         return;
 658     }
 659
 660     $charset = _CanonicalizeCharset($charset);
 661     $enc     = _CanonicalizeCharset($enc);
 662
 663     return if $charset eq $enc and $preserve_words;
 664
 665     RT::Util::assert_bytes( $head->as_string );
 666     foreach my $tag ( $head->tags ) {
 667         next unless $tag; # seen in wild: headers with no name
 668         my @values = $head->get_all($tag);
 669         $head->delete($tag);
 670         foreach my $value (@values) {
 671             if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
 672                 my $orig_value = $value;
 673                 ( my $success, $value ) = EncodeFromToWithCroak( $orig_value, $charset => $enc );
 674                 if ( !$success ) {
 675                     my $error = $value;
 676                     if ($is_out) {
 677                         $value = $orig_value;
 678                         $head->add( $tag, $value );
 679                         next;
 680                     }
 681
 682                     my $guess = _GuessCharset($orig_value);
 683                     if ( $guess && $guess ne $charset ) {
 684                         $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
 685                         ( $success, $value ) = EncodeFromToWithCroak( $orig_value, $guess, $enc );
 686                         $error = $value unless $success;
 687                     }
 688
 689                     if ( !$success ) {
 690                         $RT::Logger->error( "Encoding error: " . $error . " forcing conversion to $charset => $enc" );
 691                         $value = $orig_value;
 692                         Encode::from_to( $value, $charset => $enc );
 693                     }
 694                 }
 695             }
 696
 697             $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
 698                 unless $preserve_words;
 699
 700             # We intentionally add a leading space when re-adding the
 701             # header; Mail::Header strips it before storing, but it
 702             # serves to prevent it from "helpfully" canonicalizing
 703             # $head->add("Subject", "Subject: foo") into the same as
 704             # $head->add("Subject", "foo");
 705             $head->add( $tag, " " . $value );
 706         }
 707     }
 708
 709 }
 710
 711 =head2 EncodeFromToWithCroak $string, $from, $to
 712
 713 Try to encode string from encoding $from to encoding $to in croak mode
 714
 715 return (1, $encoded_string) if success, otherwise (0, $error)
 716
 717 =cut
 718
 719 sub EncodeFromToWithCroak {
 720     my $string = shift;
 721     my $from   = shift;
 722     my $to     = shift;
 723
 724     eval {
 725         no warnings 'utf8';
 726         $string = Encode::encode( $to, Encode::decode( $from, $string ), Encode::FB_CROAK );
 727     };
 728     return $@ ? ( 0, $@ ) : ( 1, $string );
 729 }
 730
 731 RT::Base->_ImportOverlays();
 732
 733 1;  # End of module.
 734