rt 4.2.14 (#13852)
[freeside.git] / rt / t / mail / mime_decoding.t
1 use strict;
2 use warnings;
3 use RT::Test nodb => 1, tests => undef;
4 use Test::LongString;
5 use Test::Warn;
6
7 use_ok('RT::I18N');
8
9 diag q{'=' char in a leading part before an encoded part};
10 {
11     my $str = 'key="plain"; key="=?UTF-8?B?0LzQvtC5X9GE0LDQudC7LmJpbg==?="';
12     warnings_like {
13         is(
14             RT::I18N::DecodeMIMEWordsToUTF8($str),
15             'key="plain"; key="мой_файл.bin"',
16             "right decoding"
17         );
18     } [qr/DecodeMIMEWordsTo.*?called without field name/i];
19     is(
20         RT::I18N::DecodeMIMEWordsToUTF8($str, 'content-disposition'),
21         'key="plain"; key="мой_файл.bin"',
22         "right decoding"
23     );
24 }
25
26 diag q{not compliant with standards, but MUAs send such field when attachment has non-ascii in name};
27 {
28     my $str = 'attachment; filename="=?UTF-8?B?0LzQvtC5X9GE0LDQudC7LmJpbg==?="';
29     warnings_like {
30         is(
31             RT::I18N::DecodeMIMEWordsToUTF8($str),
32             'attachment; filename="мой_файл.bin"',
33             "right decoding"
34         );
35     } [qr/DecodeMIMEWordsTo.*?called without field name/i];
36     is(
37         RT::I18N::DecodeMIMEWordsToUTF8($str, 'content-disposition'),
38         'attachment; filename="мой_файл.bin"',
39         "right decoding"
40     );
41 }
42
43 diag q{'=' char in a trailing part after an encoded part};
44 {
45     my $str = 'attachment; filename="=?UTF-8?B?0LzQvtC5X9GE0LDQudC7LmJpbg==?="; some_prop="value"';
46     warnings_like {
47         is(
48             RT::I18N::DecodeMIMEWordsToUTF8($str),
49             'attachment; filename="мой_файл.bin"; some_prop="value"',
50             "right decoding"
51         );
52     } [qr/DecodeMIMEWordsTo.*?called without field name/i];
53     is(
54         RT::I18N::DecodeMIMEWordsToUTF8($str, 'content-disposition'),
55         'attachment; filename="мой_файл.bin"; some_prop="value"',
56         "right decoding"
57     );
58 }
59
60 diag q{adding quotes around mime words containing specials when word is already quoted};
61 {
62     my $str = 'attachment; filename="=?iso-8859-1?Q?foobar,_?=' . "\n" . '=?iso-8859-1?Q?barfoo.docx?="';
63     my $decoded = 'attachment; filename="foobar, barfoo.docx"';
64     is( RT::I18N::DecodeMIMEWordsToUTF8($str, 'content-disposition'), $decoded, "No added quotes" );
65 }
66
67 diag q{regression test for #5248 from rt3.fsck.com};
68 {
69     my $str = qq{Subject: =?ISO-8859-1?Q?Re=3A_=5BXXXXXX=23269=5D_=5BComment=5D_Frag?=}
70         . qq{\n =?ISO-8859-1?Q?e_zu_XXXXXX--xxxxxx_/_Xxxxx=FCxxxxxxxxxx?=};
71     is(
72         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Subject'),
73         qq{Subject: Re: [XXXXXX#269] [Comment] Frage zu XXXXXX--xxxxxx / Xxxxxüxxxxxxxxxx},
74         "right decoding"
75     );
76 }
77
78 diag q{newline and encoded file name};
79 {
80     my $str = qq{application/vnd.ms-powerpoint;\n\tname="=?ISO-8859-1?Q?Main_presentation.ppt?="};
81     warnings_like {
82         is(
83             RT::I18N::DecodeMIMEWordsToUTF8($str),
84             qq{application/vnd.ms-powerpoint;\tname="Main presentation.ppt"},
85             "right decoding"
86         );
87     } [qr/DecodeMIMEWordsTo.*?called without field name/i];
88     is(
89         RT::I18N::DecodeMIMEWordsToUTF8($str,'content-type'),
90         qq{application/vnd.ms-powerpoint; name="Main presentation.ppt"},
91         "right decoding"
92     );
93 }
94
95 diag q{rfc2231};
96 {
97     my $str =
98 "attachment; filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74";
99     is(
100         RT::I18N::DecodeMIMEWordsToEncoding( $str, 'utf-8', 'Content-Disposition' ),
101         'attachment; filename="tést.txt"',
102         'right decoding'
103     );
104 }
105
106 diag q{rfc2231 param continuations};
107 {
108     # XXX TODO: test various forms of the continuation stuff
109     #       quotes around the values
110     my $hdr = <<'.';
111 inline;
112  filename*0*=ISO-2022-JP'ja'%1b$B%3f7$7$$%25F%25%2d%259%25H%1b%28B;
113  filename*1*=%20;
114  filename*2*=%1b$B%25I%25%2d%25e%25a%25s%25H%1b%28B;
115  filename*3=.txt
116 .
117     is(
118         RT::I18N::DecodeMIMEWordsToEncoding( $hdr, 'utf-8', 'Content-Disposition' ),
119         'inline; filename="新しいテキスト ドキュメント.txt"',
120         'decoded continuations as one string'
121     );
122 }
123
124 diag q{canonicalize mime word encodings like gb2312};
125 {
126     my $str = qq{Subject: =?gb2312?B?1NrKwL3nuPe12Lmy09CzrN9eX1NpbXBsaWZpZWRfQ05fR0IyMzEyYQ==?=
127 \t=?gb2312?B?dHRhY2hlbWVudCB0ZXN0IGluIENOIHNpbXBsaWZpZWQ=?=};
128
129     is(
130         RT::I18N::DecodeMIMEWordsToUTF8($str, "Subject"),
131         qq{Subject: 在世界各地共有超過_Simplified_CN_GB2312attachement test in CN simplified},
132         "right decoding"
133     );
134 }
135
136 diag q{Whitespace between encoded words should be removed};
137 {
138     warnings_like {
139         my $str = "=?utf-8?Q?=E3=82=AD?=    =?utf-8?Q?=E3=83=A3?=";
140         is(
141             RT::I18N::DecodeMIMEWordsToUTF8($str),
142             "キャ",
143             "whitespace between encoded words is removed",
144         );
145
146         $str = "=?utf-8?Q?=E3=82=AD?=  \n   =?utf-8?Q?=E3=83=A3?=";
147         is(
148             RT::I18N::DecodeMIMEWordsToUTF8($str),
149             "キャ",
150             "newlines between encoded words also removed",
151         );
152     } [(qr/DecodeMIMEWordsTo.*?called without field name/i) x 2];
153 }
154
155 diag q{Multiple octets split across QP hunks are correctly reassembled};
156 {
157     warnings_like {
158         # This passes even without explicit code to handle it because utf8
159         # is perl's internal string encoding.
160         my $str = "=?utf-8?Q?=E3?=    =?utf-8?Q?=82?=    =?utf-8?Q?=AD?=";
161         is(
162             RT::I18N::DecodeMIMEWordsToUTF8($str),
163             "キ",
164             "UTF8 character split in three is successfully reassembled",
165         );
166
167         # Non-utf8 encodings thus also must be checked
168         $str = <<EOT; chomp $str;
169 =?gb2312?q?Chinese(gb2312)=20=20=C3=C0=B9=FA=C7=B0=CB=BE=B7=A8=B2=BF=B3?=
170  =?gb2312?q?=A4=C3=E6=BC=FB=C8=F8=B4=EF=C4=B7=BA=F3=B3=C6=C6=E4=D7=B4=CC=AC?=
171  =?gb2312?q?=BA=DC=BA=C3=20=20Chinese=20(gb2312)?=
172 EOT
173         is(
174             RT::I18N::DecodeMIMEWordsToUTF8($str),
175             "Chinese(gb2312)  美国前司法部长面见萨达姆后称其状态很好  Chinese (gb2312)",
176             "gb2312 character is successfully reassembled",
177         );
178     } [(qr/DecodeMIMEWordsTo.*?called without field name/i) x 2];
179 }
180
181 diag "multiple mime words containing special chars already in quotes";
182 {
183     my $str = q{attachment; filename="=?ISO-2022-JP?B?Mi4bJEIlSyVlITwlOSVqJWohPCU5GyhC?= =?ISO-2022-JP?B?LnBkZg==?="};
184     is_string(
185         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
186         q{attachment; filename="2.ニュースリリース.pdf"},
187         "base64"
188     );
189
190     $str = q{attachment; filename="=?UTF-8?Q?2=2E=E3=83=8B=E3=83=A5=E3=83=BC=E3=82=B9=E3=83=AA=E3=83=AA?= =?UTF-8?Q?=E3=83=BC=E3=82=B9=2Epdf?="};
191     is_string(
192         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
193         q{attachment; filename="2.ニュースリリース.pdf"},
194         "QP"
195     );
196 }
197
198 diag "mime word combined with text in quoted filename property";
199 {
200     my $str = q{attachment; filename="=?UTF-8?B?Q2VjaSBuJ2VzdCBwYXMgdW5l?= pipe.pdf"};
201     is_string(
202         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
203         q{attachment; filename="Ceci n'est pas une pipe.pdf"},
204         "base64"
205     );
206
207     $str = q{attachment; filename="=?UTF-8?B?Q2VjaSBuJ2VzdCBwYXMgdW5lLi4u?= pipe.pdf"};
208     is_string(
209         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
210         q{attachment; filename="Ceci n'est pas une... pipe.pdf"},
211         "base64"
212     );
213
214     $str = q{attachment; filename="=?UTF-8?Q?Ceci n'est pas une?= pipe.pdf"};
215     is_string(
216         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
217         q{attachment; filename="Ceci n'est pas une pipe.pdf"},
218         "QP"
219     );
220
221     $str = q{attachment; filename="=?UTF-8?Q?Ceci n'est pas une...?= pipe.pdf"};
222     is_string(
223         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
224         q{attachment; filename="Ceci n'est pas une... pipe.pdf"},
225         "QP"
226     );
227 }
228
229 diag "quotes in filename";
230 {
231     my $str = q{attachment; filename="=?UTF-8?B?YSAicXVvdGVkIiBmaWxl?="};
232     is_string(
233         RT::I18N::DecodeMIMEWordsToUTF8($str, 'Content-Disposition'),
234         q{attachment; filename="a \"quoted\" file"},
235         "quoted filename correctly decoded"
236     );
237 }
238
239 diag "Alternating encoded-words and not, space is preserved";
240 {
241     my $str = q{A =?UTF-8?Q?B?= C =?UTF-8?Q?D?= E =?UTF-8?Q?F?= G};
242     is_string(
243         RT::I18N::DecodeMIMEWordsToUTF8($str, "Subject"),
244         q{A B C D E F G},
245         "Space is preserved between encoded-words and not"
246     );
247 }
248
249 done_testing;