-# BEGIN LICENSE BLOCK
+# BEGIN BPS TAGGED BLOCK {{{
#
-# Copyright (c) 1996-2003 Jesse Vincent <jesse@bestpractical.com>
+# COPYRIGHT:
+#
+# This software is Copyright (c) 1996-2007 Best Practical Solutions, LLC
+# <jesse@bestpractical.com>
#
-# (Except where explictly superceded by other copyright notices)
+# (Except where explicitly superseded by other copyright notices)
+#
+#
+# LICENSE:
#
# This work is made available to you under the terms of Version 2 of
# the GNU General Public License. A copy of that license should have
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
-# Unless otherwise specified, all modifications, corrections or
-# extensions to this work which alter its source code become the
-# property of Best Practical Solutions, LLC when submitted for
-# inclusion in the work.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 or visit their web page on the internet at
+# http://www.gnu.org/copyleft/gpl.html.
+#
+#
+# CONTRIBUTION SUBMISSION POLICY:
#
+# (The following paragraph is not intended to limit the rights granted
+# to you to modify and distribute this software under the terms of
+# the GNU General Public License and is only of importance to you if
+# you choose to contribute your changes and enhancements to the
+# community by submitting them to Best Practical Solutions, LLC.)
#
-# END LICENSE BLOCK
+# By intentionally submitting any modifications, corrections or
+# derivatives to this work, or any other work intended for use with
+# Request Tracker, to Best Practical Solutions, LLC, you confirm that
+# you are the copyright holder for those contributions and you grant
+# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
+# royalty-free, perpetual, license to use, copy, create derivative
+# works based on those contributions, and sublicense and distribute
+# those contributions and any derivatives thereof.
+#
+# END BPS TAGGED BLOCK }}}
=head1 NAME
RT::I18N - a base class for localization of RT
package RT::I18N;
use strict;
+use warnings;
+
use Locale::Maketext 1.04;
use Locale::Maketext::Lexicon 0.25;
use base ('Locale::Maketext::Fuzzy');
-use vars qw( %Lexicon );
-#If we're running on 5.6, we desperately need Encode::compat. But if we're on 5.8, we don't really need it.
-BEGIN { if ($] < 5.007001) {
-require Encode::compat;
-} }
use Encode;
-
use MIME::Entity;
use MIME::Head;
# I decree that this project's first language is English.
-%Lexicon = (
+our %Lexicon = (
'TEST_STRING' => 'Concrete Mixer',
'__Content-Type' => 'text/plain; charset=utf-8',
=cut
sub Init {
+ require File::Glob;
+
# Load language-specific functions
- foreach my $language ( glob(substr(__FILE__, 0, -3) . "/*.pm")) {
- if ($language =~ /^([-\w.\/\\~:]+)$/) {
+ foreach my $language ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm")) {
+ if ($language =~ /^([-\w\s.\/\\~:]+)$/) {
require $1;
}
else {
$_ => [
Gettext => (substr(__FILE__, 0, -3) . "/$_.po"),
Gettext => "$RT::LocalLexiconPath/*/$_.po",
+ Gettext => "$RT::LocalLexiconPath/$_.po",
],
} @lang
});
=head2 SetMIMEEntityToUTF8 $entity
-An utility method which will try to convert entity body into utf8.
+An utility function which will try to convert entity body into utf8.
It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
=cut
# }}}
+# {{{ IsTextualContentType
+
+=head2 IsTextualContentType $type
+
+An utility function that determines whether $type is I<textual>, meaning
+that it can sensibly be converted to Unicode text.
+
+Currently, it returns true iff $type matches this regular expression
+(case-insensitively):
+
+ ^(?:text/(?:plain|html)|message/rfc822)\b
+
+# }}}
+
+=cut
+
+sub IsTextualContentType {
+ my $type = shift;
+ ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
+}
+
# {{{ SetMIMEEntityToEncoding
=head2 SetMIMEEntityToEncoding $entity, $encoding
-An utility method which will try to convert entity body into specified
+An utility function which will try to convert entity body into specified
charset encoding (encoded as octets, *not* unicode-strings). It will
iterate all the entities in $entity, and try to convert each one into
specified charset if whose Content-Type is 'text/plain'.
-This method doesn't return anything meaningful.
+This function doesn't return anything meaningful.
=cut
sub SetMIMEEntityToEncoding {
my ( $entity, $enc, $preserve_words ) = ( shift, shift, shift );
- #if ( $entity->is_multipart ) {
- #$RT::Logger->crit("This entity is a multipart " . $entity->head->as_string);
- SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
- #}
+ # do the same for parts first of all
+ SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts;
my $charset = _FindOrGuessCharset($entity) or return;
# one and only normalization
- $charset = 'utf-8' if $charset eq 'utf8';
- $enc = 'utf-8' if $enc eq 'utf8';
+ $charset = 'utf-8' if $charset =~ /^utf-?8$/i;
+ $enc = 'utf-8' if $enc =~ /^utf-?8$/i;
- SetMIMEHeadToEncoding($entity->head, $charset => $enc, $preserve_words);
+ SetMIMEHeadToEncoding(
+ $entity->head,
+ _FindOrGuessCharset($entity, 1) => $enc,
+ $preserve_words
+ );
my $head = $entity->head;
# If this is a textual entity, we'd need to preserve its original encoding
$head->add( "X-RT-Original-Encoding" => $charset )
- if $head->mime_attr('content-type.charset') or $head->mime_type =~ /^text/;
+ if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
-
- return unless ( $head->mime_type =~ qr{^(text/plain|message/rfc822)$}i );
-
+ return unless IsTextualContentType($head->mime_type);
my $body = $entity->bodyhandle;
# {{{ Convert the body
eval {
- $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". $head->get('subject'));
+ $RT::Logger->debug("Converting '$charset' to '$enc' for ". $head->mime_type . " - ". ($head->get('subject') || 'Subjectless message'));
# NOTE:: see the comments at the end of the sub.
Encode::_utf8_off( $lines[$_] ) foreach ( 0 .. $#lines );
my $str = shift;
my $enc = shift;
-
- @_ = $str =~ m/([^=]*)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/g;
-
+ @_ = $str =~ m/(.*?)=\?([^?]+)\?([QqBb])\?([^?]+)\?=([^=]*)/gcs;
return ($str) unless (@_);
+ # add everything that hasn't matched to the end of the latest
+ # string in array this happen when we have 'key="=?encoded?="; key="plain"'
+ $_[-1] .= substr($str, pos $str);
+
$str = "";
while (@_) {
my ($prefix, $charset, $encoding, $enc_str, $trailing) =
- (shift, shift, shift, shift, shift);
+ (shift, shift, lc shift, shift, shift);
$trailing =~ s/\s?\t?$//; # Observed from Outlook Express
- if ($encoding eq 'Q' or $encoding eq 'q') {
+ if ( $encoding eq 'q' ) {
use MIME::QuotedPrint;
$enc_str =~ tr/_/ /; # Observed from Outlook Express
$enc_str = decode_qp($enc_str);
- } elsif ($encoding eq 'B' or $encoding eq 'b') {
+ } elsif ( $encoding eq 'b' ) {
use MIME::Base64;
$enc_str = decode_base64($enc_str);
} else {
- $RT::Logger->warning("RT::I18N::DecodeMIMEWordsToCharset got a " .
- "strange encoding: $encoding.");
+ $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
+ ."only Q(uoted-printable) and B(ase64) are supported");
}
# now we have got a decoded subject, try to convert into the encoding
}
}
+ # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
+ # We _should_ be preserving them encoded until after parsing is completed and
+ # THEN undo the mime-encoding.
+ #
+ # This routine should be translating the existing mimeencoding to utf8 but leaving
+ # things encoded.
+ #
+ # It's legal for headers to contain mime-encoded commas and semicolons which
+ # should not be treated as address separators. (Encoding == quoting here)
+ #
+ # until this is fixed, we must escape any string containing a comma or semicolon
+ # this is only a bandaid
+
+ $enc_str = qq{"$enc_str"} if ($enc_str =~ /[,;]/);
$str .= $prefix . $enc_str . $trailing;
}
+ # We might have \n without trailing whitespace, which will result in
+ # invalid headers.
+ $str =~ s/\n//g;
+
return ($str)
}
# {{{ _FindOrGuessCharset
-=head2 _FindOrGuessCharset MIME::Entity
+=head2 _FindOrGuessCharset MIME::Entity, $head_only
+
+When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
-When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that,
-will use Encode::Guess to try to figure it out
+If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
=cut
sub _FindOrGuessCharset {
my $entity = shift;
+ my $head_only = shift;
my $head = $entity->head;
- if ($head->mime_attr("content-type.charset")) {
- return $head->mime_attr("content-type.charset");
+ if ( my $charset = $head->mime_attr("content-type.charset") ) {
+ return $charset;
}
- if ( $head->mime_type =~ m{^text/}) {
+ if ( !$head_only and $head->mime_type =~ m{^text/}) {
my $body = $entity->bodyhandle or return;
- return _GuessCharset( $head->as_string . $body->as_string );
+ return _GuessCharset( $body->as_string );
}
else {
# potentially binary data -- don't guess the body
# }}}
-
# {{{ _GuessCharset
=head2 _GuessCharset STRING
Encode::Guess->set_suspects(@RT::EmailInputEncodings);
my $decoder = Encode::Guess->guess( $_[0] );
+ if ( defined($decoder) ) {
if ( ref $decoder ) {
$charset = $decoder->name;
$RT::Logger->debug("Guessed encoding: $charset");
else {
$RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
}
+ }
+ else {
+ $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
+ }
}
else {
$RT::Logger->warning("Cannot Encode::Guess; fallback to $fallback");
return if $charset eq $enc and $preserve_words;
foreach my $tag ( $head->tags ) {
+ next unless $tag; # seen in wild: headers with no name
my @values = $head->get_all($tag);
$head->delete($tag);
foreach my $value (@values) {