X-Git-Url: http://git.freeside.biz/gitweb/?p=freeside.git;a=blobdiff_plain;f=rt%2Flib%2FRT%2FI18N.pm;h=bc267e43869d805b4f9094352dc7c7f68b99ac79;hp=971eaa1bda1ead978b61aa8c7b070cbc4959e8da;hb=e9e0cf0989259b94d9758eceff448666a2e5a5cc;hpb=43a06151e47d2c59b833cbd8c26d97865ee850b6 diff --git a/rt/lib/RT/I18N.pm b/rt/lib/RT/I18N.pm index 971eaa1bd..bc267e438 100644 --- a/rt/lib/RT/I18N.pm +++ b/rt/lib/RT/I18N.pm @@ -2,7 +2,7 @@ # # COPYRIGHT: # -# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC +# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC # # # (Except where explicitly superseded by other copyright notices) @@ -209,23 +209,27 @@ sub SetMIMEEntityToEncoding { # do the same for parts first of all SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts; - my $charset = _FindOrGuessCharset($entity) or return; + my $head = $entity->head; + + my $charset = _FindOrGuessCharset($entity); + if ( $charset ) { + unless( Encode::find_encoding($charset) ) { + $RT::Logger->warning("Encoding '$charset' is not supported"); + $charset = undef; + } + } + unless ( $charset ) { + $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') ); + $head->mime_attr('Content-Type' => 'application/octet-stream'); + return; + } SetMIMEHeadToEncoding( - $entity->head, + $head, _FindOrGuessCharset($entity, 1) => $enc, $preserve_words ); - my $head = $entity->head; - - # convert at least MIME word encoded attachment filename - foreach my $attr (qw(content-type.name content-disposition.filename)) { - if ( my $name = $head->mime_attr($attr) and !$preserve_words ) { - $head->mime_attr( $attr => DecodeMIMEWordsToUTF8($name) ); - } - } - # If this is a textual entity, we'd need to preserve its original encoding $head->replace( "X-RT-Original-Encoding" => $charset ) if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type); @@ -234,7 +238,7 @@ sub SetMIMEEntityToEncoding { my $body = $entity->bodyhandle; - if ( $enc ne $charset && $body ) { + if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) { my $string = $body->as_string or return; $RT::Logger->debug( "Converting '$charset' to '$enc' for " @@ -292,7 +296,40 @@ sub DecodeMIMEWordsToEncoding { my $to_charset = _CanonicalizeCharset(shift); my $field = shift || ''; - my @list = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs; + # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value + # continuations, and similar syntax from RFC 2231 + if ($field =~ /^Content-(Type|Disposition)/i) { + # This concatenates continued parameters and normalizes encoded params + # to QB encoded-words which we handle below + $str = MIME::Field::ParamVal->parse($str)->stringify; + } + + # Pre-parse by removing all whitespace between encoded words + my $encoded_word = qr/ + =\? # =? + ([^?]+?) # charset + (?:\*[^?]+)? # optional '*language' + \? # ? + ([QqBb]) # encoding + \? # ? + ([^?]+) # encoded string + \?= # ?= + /x; + $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g; + + # Also merge quoted-printable sections together, in case multiple + # octets of a single encoded character were split between chunks. + # Though not valid according to RFC 2047, this has been seen in the + # wild. + 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i; + + # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our + # custom MIME word decoding and charset canonicalization. We can't do this + # until we parse before decode, instead of the other way around. + my @list = $str =~ m/(.*?) # prefix + $encoded_word + ([^=]*) # trailing + /xgcs; if ( @list ) { # add everything that hasn't matched to the end of the latest @@ -321,8 +358,15 @@ sub DecodeMIMEWordsToEncoding { } # now we have got a decoded subject, try to convert into the encoding - unless ( $charset eq $to_charset ) { - Encode::from_to( $enc_str, $charset, $to_charset ); + if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) { + if ( Encode::find_encoding($charset) ) { + Encode::from_to( $enc_str, $charset, $to_charset ); + } else { + $RT::Logger->warning("Charset '$charset' is not supported"); + $enc_str =~ s/[^[:print:]]/\357\277\275/g; + Encode::from_to( $enc_str, 'UTF-8', $to_charset ) + unless $to_charset eq 'utf-8'; + } } # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers @@ -344,33 +388,13 @@ sub DecodeMIMEWordsToEncoding { $enc_str = qq{"$enc_str"} if $enc_str =~ /[,;]/ and $enc_str !~ /^".*"$/ + and $prefix !~ /"$/ and $trailing !~ /^"/ and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i); $str .= $prefix . $enc_str . $trailing; } } -# handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, see also rfc 2231 - @list = $str =~ m/(.*?\*=)([^']*?)'([^']*?)'(\S+)(.*?)(?=(?:\*=|$))/gcs; - if (@list) { - $str = ''; - while (@list) { - my ( $prefix, $charset, $language, $enc_str, $trailing ) = - splice @list, 0, 5; - $prefix =~ s/\*=$/=/; # remove the * - $charset = _CanonicalizeCharset($charset); - $enc_str =~ s/%(\w{2})/chr hex $1/eg; - unless ( $charset eq $to_charset ) { - Encode::from_to( $enc_str, $charset, $to_charset ); - } - $enc_str = qq{"$enc_str"} - if $enc_str =~ /[,;]/ - and $enc_str !~ /^".*"$/ - and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i); - $str .= $prefix . $enc_str . $trailing; - } - } - # We might have \n without trailing whitespace, which will result in # invalid headers. $str =~ s/\n//g; @@ -544,7 +568,7 @@ sub SetMIMEHeadToEncoding { my @values = $head->get_all($tag); $head->delete($tag); foreach my $value (@values) { - if ( $charset ne $enc ) { + if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) { Encode::_utf8_off($value); Encode::from_to( $value, $charset => $enc ); }