X-Git-Url: http://git.freeside.biz/gitweb/?p=freeside.git;a=blobdiff_plain;f=rt%2Flib%2FRT%2FI18N.pm;h=bc267e43869d805b4f9094352dc7c7f68b99ac79;hp=cadf7cc7c4a83dbbac4aa68d17468f41e59f8067;hb=e9e0cf0989259b94d9758eceff448666a2e5a5cc;hpb=85e677b86fc37c54e6de2b06340351a28f5a5916 diff --git a/rt/lib/RT/I18N.pm b/rt/lib/RT/I18N.pm index cadf7cc7c..bc267e438 100644 --- a/rt/lib/RT/I18N.pm +++ b/rt/lib/RT/I18N.pm @@ -2,7 +2,7 @@ # # COPYRIGHT: # -# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC +# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC # # # (Except where explicitly superseded by other copyright notices) @@ -209,16 +209,27 @@ sub SetMIMEEntityToEncoding { # do the same for parts first of all SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts; - my $charset = _FindOrGuessCharset($entity) or return; + my $head = $entity->head; + + my $charset = _FindOrGuessCharset($entity); + if ( $charset ) { + unless( Encode::find_encoding($charset) ) { + $RT::Logger->warning("Encoding '$charset' is not supported"); + $charset = undef; + } + } + unless ( $charset ) { + $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') ); + $head->mime_attr('Content-Type' => 'application/octet-stream'); + return; + } SetMIMEHeadToEncoding( - $entity->head, + $head, _FindOrGuessCharset($entity, 1) => $enc, $preserve_words ); - my $head = $entity->head; - # If this is a textual entity, we'd need to preserve its original encoding $head->replace( "X-RT-Original-Encoding" => $charset ) if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type); @@ -227,7 +238,7 @@ sub SetMIMEEntityToEncoding { my $body = $entity->bodyhandle; - if ( $enc ne $charset && $body ) { + if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) { my $string = $body->as_string or return; $RT::Logger->debug( "Converting '$charset' to '$enc' for " @@ -293,18 +304,30 @@ sub DecodeMIMEWordsToEncoding { $str = MIME::Field::ParamVal->parse($str)->stringify; } + # Pre-parse by removing all whitespace between encoded words + my $encoded_word = qr/ + =\? # =? + ([^?]+?) # charset + (?:\*[^?]+)? # optional '*language' + \? # ? + ([QqBb]) # encoding + \? # ? + ([^?]+) # encoded string + \?= # ?= + /x; + $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g; + + # Also merge quoted-printable sections together, in case multiple + # octets of a single encoded character were split between chunks. + # Though not valid according to RFC 2047, this has been seen in the + # wild. + 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i; + # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our # custom MIME word decoding and charset canonicalization. We can't do this # until we parse before decode, instead of the other way around. my @list = $str =~ m/(.*?) # prefix - =\? # =? - ([^?]+?) # charset - (?:\*[^?]+)? # optional '*language' - \? # ? - ([QqBb]) # encoding - \? # ? - ([^?]+) # encoded string - \?= # ?= + $encoded_word ([^=]*) # trailing /xgcs; @@ -335,8 +358,15 @@ sub DecodeMIMEWordsToEncoding { } # now we have got a decoded subject, try to convert into the encoding - unless ( $charset eq $to_charset ) { - Encode::from_to( $enc_str, $charset, $to_charset ); + if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) { + if ( Encode::find_encoding($charset) ) { + Encode::from_to( $enc_str, $charset, $to_charset ); + } else { + $RT::Logger->warning("Charset '$charset' is not supported"); + $enc_str =~ s/[^[:print:]]/\357\277\275/g; + Encode::from_to( $enc_str, 'UTF-8', $to_charset ) + unless $to_charset eq 'utf-8'; + } } # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers @@ -358,6 +388,7 @@ sub DecodeMIMEWordsToEncoding { $enc_str = qq{"$enc_str"} if $enc_str =~ /[,;]/ and $enc_str !~ /^".*"$/ + and $prefix !~ /"$/ and $trailing !~ /^"/ and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i); $str .= $prefix . $enc_str . $trailing; @@ -537,7 +568,7 @@ sub SetMIMEHeadToEncoding { my @values = $head->get_all($tag); $head->delete($tag); foreach my $value (@values) { - if ( $charset ne $enc ) { + if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) { Encode::_utf8_off($value); Encode::from_to( $value, $charset => $enc ); }