summaryrefslogtreecommitdiff
path: root/rt/lib/RT/I18N.pm
diff options
context:
space:
mode:
authorIvan Kohler <ivan@freeside.biz>2013-06-04 00:21:24 -0700
committerIvan Kohler <ivan@freeside.biz>2013-06-04 00:21:24 -0700
commit679854b8bbc65d112071111bbd7f34a6a481fb30 (patch)
treedda0862fdf7853f4f61e4cf155c8bbc93768c994 /rt/lib/RT/I18N.pm
parent9b328d940af56b9924a342192ebb0790478fa705 (diff)
RT 4.0.13
Diffstat (limited to 'rt/lib/RT/I18N.pm')
-rw-r--r--rt/lib/RT/I18N.pm58
1 files changed, 44 insertions, 14 deletions
diff --git a/rt/lib/RT/I18N.pm b/rt/lib/RT/I18N.pm
index e453cfa04..0e75b9f3b 100644
--- a/rt/lib/RT/I18N.pm
+++ b/rt/lib/RT/I18N.pm
@@ -2,7 +2,7 @@
#
# COPYRIGHT:
#
-# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC
+# This software is Copyright (c) 1996-2013 Best Practical Solutions, LLC
# <sales@bestpractical.com>
#
# (Except where explicitly superseded by other copyright notices)
@@ -209,16 +209,27 @@ sub SetMIMEEntityToEncoding {
# do the same for parts first of all
SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
- my $charset = _FindOrGuessCharset($entity) or return;
+ my $head = $entity->head;
+
+ my $charset = _FindOrGuessCharset($entity);
+ if ( $charset ) {
+ unless( Encode::find_encoding($charset) ) {
+ $RT::Logger->warning("Encoding '$charset' is not supported");
+ $charset = undef;
+ }
+ }
+ unless ( $charset ) {
+ $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') );
+ $head->mime_attr('Content-Type' => 'application/octet-stream');
+ return;
+ }
SetMIMEHeadToEncoding(
- $entity->head,
+ $head,
_FindOrGuessCharset($entity, 1) => $enc,
$preserve_words
);
- my $head = $entity->head;
-
# If this is a textual entity, we'd need to preserve its original encoding
$head->replace( "X-RT-Original-Encoding" => $charset )
if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
@@ -293,18 +304,30 @@ sub DecodeMIMEWordsToEncoding {
$str = MIME::Field::ParamVal->parse($str)->stringify;
}
+ # Pre-parse by removing all whitespace between encoded words
+ my $encoded_word = qr/
+ =\? # =?
+ ([^?]+?) # charset
+ (?:\*[^?]+)? # optional '*language'
+ \? # ?
+ ([QqBb]) # encoding
+ \? # ?
+ ([^?]+) # encoded string
+ \?= # ?=
+ /x;
+ $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
+
+ # Also merge quoted-printable sections together, in case multiple
+ # octets of a single encoded character were split between chunks.
+ # Though not valid according to RFC 2047, this has been seen in the
+ # wild.
+ 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
+
# XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
# custom MIME word decoding and charset canonicalization. We can't do this
# until we parse before decode, instead of the other way around.
my @list = $str =~ m/(.*?) # prefix
- =\? # =?
- ([^?]+?) # charset
- (?:\*[^?]+)? # optional '*language'
- \? # ?
- ([QqBb]) # encoding
- \? # ?
- ([^?]+) # encoded string
- \?= # ?=
+ $encoded_word
([^=]*) # trailing
/xgcs;
@@ -336,7 +359,14 @@ sub DecodeMIMEWordsToEncoding {
# now we have got a decoded subject, try to convert into the encoding
if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
- Encode::from_to( $enc_str, $charset, $to_charset );
+ if ( Encode::find_encoding($charset) ) {
+ Encode::from_to( $enc_str, $charset, $to_charset );
+ } else {
+ $RT::Logger->warning("Charset '$charset' is not supported");
+ $enc_str =~ s/[^[:print:]]/\357\277\275/g;
+ Encode::from_to( $enc_str, 'UTF-8', $to_charset )
+ unless $to_charset eq 'utf-8';
+ }
}
# XXX TODO: RT doesn't currently do the right thing with mime-encoded headers