rt/t/mail/mime_decoding.t

   1 use strict;
   2 use warnings;
   3 use RT::Test nodb => 1, tests => 13;
   4
   5 use_ok('RT::I18N');
   6
   7 diag q{'=' char in a leading part before an encoded part};
   8 {
   9     my $str = 'key="plain"; key="=?UTF-8?B?0LzQvtC5X9GE0LDQudC7LmJpbg==?="';
  10     is(
  11         RT::I18N::DecodeMIMEWordsToUTF8($str),
  12         'key="plain"; key="мой_файл.bin"',
  13         "right decoding"
  14     );
  15 }
  16
  17 diag q{not compliant with standards, but MUAs send such field when attachment has non-ascii in name};
  18 {
  19     my $str = 'attachment; filename="=?UTF-8?B?0LzQvtC5X9GE0LDQudC7LmJpbg==?="';
  20     is(
  21         RT::I18N::DecodeMIMEWordsToUTF8($str),
  22         'attachment; filename="мой_файл.bin"',
  23         "right decoding"
  24     );
  25 }
  26
  27 diag q{'=' char in a trailing part after an encoded part};
  28 {
  29     my $str = 'attachment; filename="=?UTF-8?B?0LzQvtC5X9GE0LDQudC7LmJpbg==?="; some_prop="value"';
  30     is(
  31         RT::I18N::DecodeMIMEWordsToUTF8($str),
  32         'attachment; filename="мой_файл.bin"; some_prop="value"',
  33         "right decoding"
  34     );
  35 }
  36
  37 diag q{regression test for #5248 from rt3.fsck.com};
  38 {
  39     my $str = qq{Subject: =?ISO-8859-1?Q?Re=3A_=5BXXXXXX=23269=5D_=5BComment=5D_Frag?=}
  40         . qq{\n =?ISO-8859-1?Q?e_zu_XXXXXX--xxxxxx_/_Xxxxx=FCxxxxxxxxxx?=};
  41     is(
  42         RT::I18N::DecodeMIMEWordsToUTF8($str),
  43         qq{Subject: Re: [XXXXXX#269] [Comment] Frage zu XXXXXX--xxxxxx / Xxxxxüxxxxxxxxxx},
  44         "right decoding"
  45     );
  46 }
  47
  48 diag q{newline and encoded file name};
  49 {
  50     my $str = qq{application/vnd.ms-powerpoint;\n\tname="=?ISO-8859-1?Q?Main_presentation.ppt?="};
  51     is(
  52         RT::I18N::DecodeMIMEWordsToUTF8($str),
  53         qq{application/vnd.ms-powerpoint;\tname="Main presentation.ppt"},
  54         "right decoding"
  55     );
  56 }
  57
  58 diag q{rfc2231};
  59 {
  60     my $str =
  61 "attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
  62     is(
  63         RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
  64         'attachment; filename="tést.txt"',
  65         'right decoding'
  66     );
  67 }
  68
  69 diag q{rfc2231 param continuations};
  70 {
  71     # XXX TODO: test various forms of the continuation stuff
  72     #       quotes around the values
  73     my $hdr = <<'.';
  74 inline;
  75  filename*0*=ISO-2022-JP'ja'%1b$B%3f7$7$$%25F%25%2d%259%25H%1b%28B;
  76  filename*1*=%20;
  77  filename*2*=%1b$B%25I%25%2d%25e%25a%25s%25H%1b%28B;
  78  filename*3=.txt
  79 .
  80     is(
  81         RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8', 'Content-Disposition' ),
  82         'inline; filename="新しいテキスト ドキュメント.txt"',
  83         'decoded continuations as one string'
  84     );
  85 }
  86
  87 diag q{canonicalize mime word encodings like gb2312};
  88 {
  89     my $str = qq{Subject: =?gb2312?B?1NrKwL3nuPe12Lmy09CzrN9eX1NpbXBsaWZpZWRfQ05fR0IyMzEyYQ==?=
  90         =?gb2312?B?dHRhY2hlbWVudCB0ZXN0IGluIENOIHNpbXBsaWZpZWQ=?=};
  91
  92     is(
  93         RT::I18N::DecodeMIMEWordsToUTF8($str),
  94         qq{Subject: 在世界各地共有超過_Simplified_CN_GB2312attachement test in CN simplified},
  95         "right decoding"
  96     );
  97 }
  98
  99
 100 diag q{Whitespace between encoded words should be removed};
 101 {
 102     my $str = "=?utf-8?Q?=E3=82=AD?=    =?utf-8?Q?=E3=83=A3?=";
 103     is(
 104         RT::I18N::DecodeMIMEWordsToUTF8($str),
 105         "キャ",
 106         "whitespace between encoded words is removed",
 107     );
 108
 109     $str = "=?utf-8?Q?=E3=82=AD?=  \n   =?utf-8?Q?=E3=83=A3?=";
 110     is(
 111         RT::I18N::DecodeMIMEWordsToUTF8($str),
 112         "キャ",
 113         "newlines between encoded words also removed",
 114     );
 115 }
 116
 117 diag q{Multiple octets split across QP hunks are correctly reassembled};
 118 {
 119     # This passes even without explicit code to handle it because utf8
 120     # is perl's internal string encoding.
 121     my $str = "=?utf-8?Q?=E3?=    =?utf-8?Q?=82?=    =?utf-8?Q?=AD?=";
 122     is(
 123         RT::I18N::DecodeMIMEWordsToUTF8($str),
 124         "キ",
 125         "UTF8 character split in three is successfully reassembled",
 126     );
 127
 128     # Non-utf8 encodings thus also must be checked
 129     $str = <<EOT; chomp $str;
 130 =?gb2312?q?Chinese(gb2312)=20=20=C3=C0=B9=FA=C7=B0=CB=BE=B7=A8=B2=BF=B3?=
 131  =?gb2312?q?=A4=C3=E6=BC=FB=C8=F8=B4=EF=C4=B7=BA=F3=B3=C6=C6=E4=D7=B4=CC=AC?=
 132  =?gb2312?q?=BA=DC=BA=C3=20=20Chinese=20(gb2312)?=
 133 EOT
 134     is(
 135         RT::I18N::DecodeMIMEWordsToUTF8($str),
 136         "Chinese(gb2312)  美国前司法部长面见萨达姆后称其状态很好  Chinese (gb2312)",
 137         "gb2312 character is successfully reassembled",
 138     );
 139
 140 }