- my $enc = shift;
-
-
- @_ = $str =~ m/([^=]*)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/g;
-
- return ($str) unless (@_);
-
- $str = "";
- while (@_) {
- my ($prefix, $charset, $encoding, $enc_str, $trailing) =
- (shift, shift, shift, shift, shift);
-
- $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
-
- if ($encoding eq 'Q' or $encoding eq 'q') {
- use MIME::QuotedPrint;
- $enc_str =~ tr/_/ /; # Observed from Outlook Express
- $enc_str = decode_qp($enc_str);
- } elsif ($encoding eq 'B' or $encoding eq 'b') {
- use MIME::Base64;
- $enc_str = decode_base64($enc_str);
- } else {
- $RT::Logger->warning("RT::I18N::DecodeMIMEWordsToCharset got a " .
- "strange encoding: $encoding.");
- }
-
- # now we have got a decoded subject, try to convert into the encoding
- unless ($charset eq $enc) {
- eval { Encode::from_to($enc_str, $charset, $enc) };
- if ($@) {
- $charset = _GuessCharset( $enc_str );
- Encode::from_to($enc_str, $charset, $enc);
- }
- }
-
- $str .= $prefix . $enc_str . $trailing;
+ my $to_charset = _CanonicalizeCharset(shift);
+ my $field = shift || '';
+
+ # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
+ # continuations, and similar syntax from RFC 2231
+ if ($field =~ /^Content-(Type|Disposition)/i) {
+ # This concatenates continued parameters and normalizes encoded params
+ # to QB encoded-words which we handle below
+ $str = MIME::Field::ParamVal->parse($str)->stringify;
+ }
+
+ # Pre-parse by removing all whitespace between encoded words
+ my $encoded_word = qr/
+ =\? # =?
+ ([^?]+?) # charset
+ (?:\*[^?]+)? # optional '*language'
+ \? # ?
+ ([QqBb]) # encoding
+ \? # ?
+ ([^?]+) # encoded string
+ \?= # ?=
+ /x;
+ $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
+
+ # Also merge quoted-printable sections together, in case multiple
+ # octets of a single encoded character were split between chunks.
+ # Though not valid according to RFC 2047, this has been seen in the
+ # wild.
+ 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
+
+ # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
+ # custom MIME word decoding and charset canonicalization. We can't do this
+ # until we parse before decode, instead of the other way around.
+ my @list = $str =~ m/(.*?) # prefix
+ $encoded_word
+ ([^=]*) # trailing
+ /xgcs;
+
+ if ( @list ) {
+ # add everything that hasn't matched to the end of the latest
+ # string in array this happen when we have 'key="=?encoded?="; key="plain"'
+ $list[-1] .= substr($str, pos $str);
+
+ $str = "";
+ while (@list) {
+ my ($prefix, $charset, $encoding, $enc_str, $trailing) =
+ splice @list, 0, 5;
+ $charset = _CanonicalizeCharset($charset);
+ $encoding = lc $encoding;
+
+ $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
+
+ if ( $encoding eq 'q' ) {
+ use MIME::QuotedPrint;
+ $enc_str =~ tr/_/ /; # Observed from Outlook Express
+ $enc_str = decode_qp($enc_str);
+ } elsif ( $encoding eq 'b' ) {
+ use MIME::Base64;
+ $enc_str = decode_base64($enc_str);
+ } else {
+ $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
+ ."only Q(uoted-printable) and B(ase64) are supported");
+ }
+
+ # now we have got a decoded subject, try to convert into the encoding
+ if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
+ if ( Encode::find_encoding($charset) ) {
+ Encode::from_to( $enc_str, $charset, $to_charset );
+ } else {
+ $RT::Logger->warning("Charset '$charset' is not supported");
+ $enc_str =~ s/[^[:print:]]/\357\277\275/g;
+ Encode::from_to( $enc_str, 'UTF-8', $to_charset )
+ unless $to_charset eq 'utf-8';
+ }
+ }
+
+ # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
+ # We _should_ be preserving them encoded until after parsing is completed and
+ # THEN undo the mime-encoding.
+ #
+ # This routine should be translating the existing mimeencoding to utf8 but leaving
+ # things encoded.
+ #
+ # It's legal for headers to contain mime-encoded commas and semicolons which
+ # should not be treated as address separators. (Encoding == quoting here)
+ #
+ # until this is fixed, we must escape any string containing a comma or semicolon
+ # this is only a bandaid
+
+ # Some _other_ MUAs encode quotes _already_, and double quotes
+ # confuse us a lot, so only quote it if it isn't quoted
+ # already.
+ $enc_str = qq{"$enc_str"}
+ if $enc_str =~ /[,;]/
+ and $enc_str !~ /^".*"$/
+ and $prefix !~ /"$/ and $trailing !~ /^"/
+ and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i);
+
+ $str .= $prefix . $enc_str . $trailing;
+ }