1 # BEGIN BPS TAGGED BLOCK {{{
5 # This software is Copyright (c) 1996-2013 Best Practical Solutions, LLC
6 # <sales@bestpractical.com>
8 # (Except where explicitly superseded by other copyright notices)
13 # This work is made available to you under the terms of Version 2 of
14 # the GNU General Public License. A copy of that license should have
15 # been provided with this software, but in any event can be snarfed
18 # This work is distributed in the hope that it will be useful, but
19 # WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 # 02110-1301 or visit their web page on the internet at
27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
30 # CONTRIBUTION SUBMISSION POLICY:
32 # (The following paragraph is not intended to limit the rights granted
33 # to you to modify and distribute this software under the terms of
34 # the GNU General Public License and is only of importance to you if
35 # you choose to contribute your changes and enhancements to the
36 # community by submitting them to Best Practical Solutions, LLC.)
38 # By intentionally submitting any modifications, corrections or
39 # derivatives to this work, or any other work intended for use with
40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
41 # you are the copyright holder for those contributions and you grant
42 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
43 # royalty-free, perpetual, license to use, copy, create derivative
44 # works based on those contributions, and sublicense and distribute
45 # those contributions and any derivatives thereof.
47 # END BPS TAGGED BLOCK }}}
49 package RT::EmailParser;
52 use base qw/RT::Base/;
62 use File::Temp qw/tempdir/;
66 RT::EmailParser - helper functions for parsing parts from incoming
81 Returns a new RT::EmailParser object
87 my $class = ref($proto) || $proto;
89 bless ($self, $class);
94 =head2 SmartParseMIMEEntityFromScalar Message => SCALAR_REF [, Decode => BOOL, Exact => BOOL ] }
96 Parse a message stored in a scalar from scalar_ref.
100 sub SmartParseMIMEEntityFromScalar {
102 my %args = ( Message => undef, Decode => 1, Exact => 0, @_ );
105 my ( $fh, $temp_file );
108 # on NFS and NTFS, it is possible that tempfile() conflicts
109 # with other processes, causing a race condition. we try to
110 # accommodate this by pausing and retrying.
112 if ( $fh, $temp_file ) =
113 eval { File::Temp::tempfile( undef, UNLINK => 0 ) };
121 print $fh $args{'Message'};
123 if ( -f $temp_file ) {
125 # We have to trust the temp file's name -- untaint it
126 $temp_file =~ /(.*)/;
127 my $entity = $self->ParseMIMEEntityFromFile( $1, $args{'Decode'}, $args{'Exact'} );
134 #If for some reason we weren't able to parse the message using a temp file
135 # try it with a scalar
136 if ( $@ || !$self->Entity ) {
137 return $self->ParseMIMEEntityFromScalar( $args{'Message'}, $args{'Decode'}, $args{'Exact'} );
143 =head2 ParseMIMEEntityFromSTDIN
145 Parse a message from standard input
149 sub ParseMIMEEntityFromSTDIN {
151 return $self->ParseMIMEEntityFromFileHandle(\*STDIN, @_);
154 =head2 ParseMIMEEntityFromScalar $message
156 Takes either a scalar or a reference to a scalar which contains a stringified MIME message.
159 Returns true if it wins.
160 Returns false if it loses.
164 sub ParseMIMEEntityFromScalar {
166 return $self->_ParseMIMEEntity( shift, 'parse_data', @_ );
169 =head2 ParseMIMEEntityFromFilehandle *FH
171 Parses a mime entity from a filehandle passed in as an argument
175 sub ParseMIMEEntityFromFileHandle {
177 return $self->_ParseMIMEEntity( shift, 'parse', @_ );
180 =head2 ParseMIMEEntityFromFile
182 Parses a mime entity from a filename passed in as an argument
186 sub ParseMIMEEntityFromFile {
188 return $self->_ParseMIMEEntity( shift, 'parse_open', @_ );
192 sub _ParseMIMEEntity {
196 my $postprocess = (@_ ? shift : 1);
199 # Create a new parser object:
200 my $parser = MIME::Parser->new();
201 $self->_SetupMIMEParser($parser);
202 $parser->decode_bodies(0) if $exact;
204 # TODO: XXX 3.0 we really need to wrap this in an eval { }
205 unless ( $self->{'entity'} = $parser->$method($message) ) {
206 $RT::Logger->crit("Couldn't parse MIME stream and extract the submessages");
207 # Try again, this time without extracting nested messages
208 $parser->extract_nested_messages(0);
209 unless ( $self->{'entity'} = $parser->$method($message) ) {
210 $RT::Logger->crit("couldn't parse MIME stream");
215 $self->_PostProcessNewEntity if $postprocess;
217 return $self->{'entity'};
222 return unless $self->{'entity'};
224 my @parts = $self->{'entity'}->parts_DFS;
225 $self->_DecodeBody($_) foreach @parts;
232 my $old = $entity->bodyhandle or return;
233 return unless $old->is_encoded;
235 require MIME::Decoder;
236 my $encoding = $entity->head->mime_encoding;
237 my $decoder = MIME::Decoder->new($encoding);
238 unless ( $decoder ) {
239 $RT::Logger->error("Couldn't find decoder for '$encoding', switching to binary");
245 # XXX: use InCore for now, but later must switch to files
246 my $new = MIME::Body::InCore->new();
250 my $source = $old->open('r') or die "couldn't open body: $!";
251 my $destination = $new->open('w') or die "couldn't open body: $!";
254 eval { $decoder->decode($source, $destination) };
255 $RT::Logger->error($@) if $@;
257 $source->close or die "can't close: $!";
258 $destination->close or die "can't close: $!";
260 $entity->bodyhandle( $new );
263 =head2 _PostProcessNewEntity
265 cleans up and postprocesses a newly parsed MIME Entity
269 sub _PostProcessNewEntity {
272 #Now we've got a parsed mime object.
274 # Unfold headers that are have embedded newlines
275 # Better do this before conversion or it will break
276 # with multiline encoded Subject (RFC2047) (fsck.com #5594)
279 # try to convert text parts into utf-8 charset
280 RT::I18N::SetMIMEEntityToEncoding($self->{'entity'}, 'utf-8');
283 =head2 ParseCcAddressesFromHead HASHREF
285 Takes a hashref object containing QueueObj, Head and CurrentUser objects.
286 Returns a list of all email addresses in the To and Cc
287 headers b<except> the current Queue's email addresses, the CurrentUser's
288 email address and anything that the RT->Config->Get('RTAddressRegexp') matches.
292 sub ParseCcAddressesFromHead {
296 CurrentUser => undef,
302 my @ToObjs = Email::Address->parse( $self->Head->get('To') );
303 my @CcObjs = Email::Address->parse( $self->Head->get('Cc') );
305 foreach my $AddrObj ( @ToObjs, @CcObjs ) {
306 my $Address = $AddrObj->address;
307 my $user = RT::User->new(RT->SystemUser);
308 $Address = $user->CanonicalizeEmailAddress($Address);
309 next if lc $args{'CurrentUser'}->EmailAddress eq lc $Address;
310 next if $self->IsRTAddress($Address);
312 push ( @Addresses, $Address );
318 =head2 IsRTaddress ADDRESS
320 Takes a single parameter, an email address.
321 Returns true if that address matches the C<RTAddressRegexp> config option.
322 Returns false, otherwise.
331 if ( my $address_re = RT->Config->Get('RTAddressRegexp') ) {
332 return $address =~ /$address_re/i ? 1 : undef;
335 # we don't warn here, but do in config check
336 if ( my $correspond_address = RT->Config->Get('CorrespondAddress') ) {
337 return 1 if lc $correspond_address eq lc $address;
339 if ( my $comment_address = RT->Config->Get('CommentAddress') ) {
340 return 1 if lc $comment_address eq lc $address;
343 my $queue = RT::Queue->new( RT->SystemUser );
344 $queue->LoadByCols( CorrespondAddress => $address );
345 return 1 if $queue->id;
347 $queue->LoadByCols( CommentAddress => $address );
348 return 1 if $queue->id;
354 =head2 CullRTAddresses ARRAY
356 Takes a single argument, an array of email addresses.
357 Returns the same array with any IsRTAddress()es weeded out.
362 sub CullRTAddresses {
364 my @addresses = (@_);
366 return grep { !$self->IsRTAddress($_) } @addresses;
373 # LookupExternalUserInfo is a site-definable method for synchronizing
374 # incoming users with an external data source.
376 # This routine takes a tuple of EmailAddress and FriendlyName
377 # EmailAddress is the user's email address, ususally taken from
378 # an email message's From: header.
379 # FriendlyName is a freeform string, ususally taken from the "comment"
380 # portion of an email message's From: header.
382 # If you define an AutoRejectRequest template, RT will use this
383 # template for the rejection message.
386 =head2 LookupExternalUserInfo
388 LookupExternalUserInfo is a site-definable method for synchronizing
389 incoming users with an external data source.
391 This routine takes a tuple of EmailAddress and FriendlyName
392 EmailAddress is the user's email address, ususally taken from
393 an email message's From: header.
394 FriendlyName is a freeform string, ususally taken from the "comment"
395 portion of an email message's From: header.
397 It returns (FoundInExternalDatabase, ParamHash);
399 FoundInExternalDatabase must be set to 1 before return if the user
400 was found in the external database.
402 ParamHash is a Perl parameter hash which can contain at least the
403 following fields. These fields are used to populate RT's users
404 database when the user is created.
406 EmailAddress is the email address that RT should use for this user.
407 Name is the 'Name' attribute RT should use for this user.
408 'Name' is used for things like access control and user lookups.
409 RealName is what RT should display as the user's name when displaying
414 sub LookupExternalUserInfo {
416 my $EmailAddress = shift;
417 my $RealName = shift;
419 my $FoundInExternalDatabase = 1;
422 #Name is the RT username you want to use for this user.
423 $params{'Name'} = $EmailAddress;
424 $params{'EmailAddress'} = $EmailAddress;
425 $params{'RealName'} = $RealName;
427 return ($FoundInExternalDatabase, %params);
432 Return the parsed head from this message
438 return $self->Entity->head;
443 Return the parsed Entity from this message
449 return $self->{'entity'};
454 =head2 _SetupMIMEParser $parser
456 A private instance method which sets up a mime parser to do its job
461 ## TODO: Does it make sense storing to disk at all? After all, we
462 ## need to put each msg as an in-core scalar before saving it to
463 ## the database, don't we?
465 ## At the same time, we should make sure that we nuke attachments
466 ## Over max size and return them
468 sub _SetupMIMEParser {
472 # Set up output directory for files; we use $RT::VarPath instead
473 # of File::Spec->tmpdir (e.g., /tmp) beacuse it isn't always
476 if ( -w $RT::VarPath ) {
477 $tmpdir = File::Temp::tempdir( DIR => $RT::VarPath, CLEANUP => 1 );
478 } elsif (-w File::Spec->tmpdir) {
479 $tmpdir = File::Temp::tempdir( TMPDIR => 1, CLEANUP => 1 );
481 $RT::Logger->crit("Neither the RT var directory ($RT::VarPath) nor the system tmpdir (@{[File::Spec->tmpdir]}) are writable; falling back to in-memory parsing!");
484 #If someone includes a message, extract it
485 $parser->extract_nested_messages(1);
486 $parser->extract_uuencode(1); ### default is false
489 # If we got a writable tmpdir, write to disk
490 push ( @{ $self->{'AttachmentDirs'} ||= [] }, $tmpdir );
491 $parser->output_dir($tmpdir);
492 $parser->filer->ignore_filename(1);
494 # Set up the prefix for files with auto-generated names:
495 $parser->output_prefix("part");
497 # From the MIME::Parser docs:
498 # "Normally, tmpfiles are created when needed during parsing, and destroyed automatically when they go out of scope"
499 # Turns out that the default is to recycle tempfiles
500 # Temp files should never be recycled, especially when running under perl taint checking
502 $parser->tmp_recycling(0) if $parser->can('tmp_recycling');
504 # Otherwise, fall back to storing it in memory
505 $parser->output_to_core(1);
506 $parser->tmp_to_core(1);
507 $parser->use_inner_files(1);
512 =head2 ParseEmailAddress string
514 Returns a list of Email::Address objects
515 Works around the bug that Email::Address 1.889 and earlier
516 doesn't handle local-only email addresses (when users pass
517 in just usernames on the RT system in fields that expect
520 We don't handle the case of
521 bob, fred@bestpractical.com
522 because we don't want to fail parsing
523 bob, "Falcone, Fred" <fred@bestpractical.com>
524 The next release of Email::Address will have a new method
525 we can use that removes the bandaid
529 sub ParseEmailAddress {
531 my $address_string = shift;
533 $address_string =~ s/^\s+|\s+$//g;
536 # if it looks like a username / local only email
537 if ($address_string !~ /@/ && $address_string =~ /^\w+$/) {
538 my $user = RT::User->new( RT->SystemUser );
539 my ($id, $msg) = $user->Load($address_string);
541 push @addresses, Email::Address->new($user->Name,$user->EmailAddress);
543 $RT::Logger->error("Unable to parse an email address from $address_string: $msg");
546 @addresses = Email::Address->parse($address_string);
555 Outlook 2007/2010 have a bug when you write an email with the html format.
556 it will send a 'multipart/alternative' with both 'text/plain' and 'text/html'
557 in it. it's cool to have a 'text/plain' part, but the problem is the part is
558 not so right: all the "\n" in your main message will become "\n\n" :/
560 this method will fix this bug, i.e. replaces "\n\n" to "\n".
561 return 1 if it does find the problem in the entity and get it fixed.
568 my $mime = $self->Entity();
569 return unless $mime && $self->LooksLikeMSEmail($mime);
572 if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
573 my $first = $mime->parts(0);
574 if ( $first && $first->head->get('Content-Type') =~ m{multipart/alternative} )
576 my $inner_first = $first->parts(0);
577 if ( $inner_first && $inner_first->head->get('Content-Type') =~ m{text/plain} )
579 $text_part = $inner_first;
583 elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
584 my $first = $mime->parts(0);
585 if ( $first && $first->head->get('Content-Type') =~ m{text/plain} ) {
590 # Add base64 since we've seen examples of double newlines with
591 # this type too. Need an example of a multi-part base64 to
592 # handle that permutation if it exists.
593 elsif ( $mime->head->get('Content-Transfer-Encoding') =~ m{base64} ) {
594 $text_part = $mime; # Assuming single part, already decoded.
599 # use the unencoded string
600 my $content = $text_part->bodyhandle->as_string;
601 if ( $content =~ s/\n\n/\n/g ) {
603 # Outlook puts a space on extra newlines, remove it
604 $content =~ s/\ +$//mg;
606 # only write only if we did change the content
607 if ( my $io = $text_part->open("w") ) {
608 $io->print($content);
611 "Removed extra newlines from MS Outlook message.");
615 $RT::Logger->error("Can't write to body to fix newlines");
623 =head1 LooksLikeMSEmail
625 Try to determine if the current email may have
626 come from MS Outlook or gone through Exchange, and therefore
627 may have extra newlines added.
631 sub LooksLikeMSEmail {
635 my $mailer = $mime->head->get('X-Mailer');
637 # 12.0 is outlook 2007, 14.0 is 2010
638 return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
640 if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
642 # Check for additional headers that might
643 # indicate this came from Outlook or through Exchange.
644 # A sample we received had the headers X-MS-Has-Attach: and
645 # X-MS-Tnef-Correlator: and both had no value.
647 my @tags = $mime->head->tags();
648 return 1 if grep { /^X-MS-/ } @tags;
651 return 0; # Doesn't look like MS email.
656 File::Path::rmtree([@{$self->{'AttachmentDirs'}}],0,1)
657 if $self->{'AttachmentDirs'};
662 RT::Base->_ImportOverlays();