1 # BEGIN BPS TAGGED BLOCK {{{
5 # This software is Copyright (c) 1996-2017 Best Practical Solutions, LLC
6 # <sales@bestpractical.com>
8 # (Except where explicitly superseded by other copyright notices)
13 # This work is made available to you under the terms of Version 2 of
14 # the GNU General Public License. A copy of that license should have
15 # been provided with this software, but in any event can be snarfed
18 # This work is distributed in the hope that it will be useful, but
19 # WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 # 02110-1301 or visit their web page on the internet at
27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
30 # CONTRIBUTION SUBMISSION POLICY:
32 # (The following paragraph is not intended to limit the rights granted
33 # to you to modify and distribute this software under the terms of
34 # the GNU General Public License and is only of importance to you if
35 # you choose to contribute your changes and enhancements to the
36 # community by submitting them to Best Practical Solutions, LLC.)
38 # By intentionally submitting any modifications, corrections or
39 # derivatives to this work, or any other work intended for use with
40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
41 # you are the copyright holder for those contributions and you grant
42 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
43 # royalty-free, perpetual, license to use, copy, create derivative
44 # works based on those contributions, and sublicense and distribute
45 # those contributions and any derivatives thereof.
47 # END BPS TAGGED BLOCK }}}
49 package RT::EmailParser;
52 use base qw/RT::Base/;
62 use File::Temp qw/tempdir/;
66 RT::EmailParser - helper functions for parsing parts from incoming
81 Returns a new RT::EmailParser object
87 my $class = ref($proto) || $proto;
89 bless ($self, $class);
94 =head2 SmartParseMIMEEntityFromScalar Message => SCALAR_REF [, Decode => BOOL, Exact => BOOL ] }
96 Parse a message stored in a scalar from scalar_ref.
100 sub SmartParseMIMEEntityFromScalar {
102 my %args = ( Message => undef, Decode => 1, Exact => 0, @_ );
105 my ( $fh, $temp_file );
108 # on NFS and NTFS, it is possible that tempfile() conflicts
109 # with other processes, causing a race condition. we try to
110 # accommodate this by pausing and retrying.
112 if ( $fh, $temp_file ) =
113 eval { File::Temp::tempfile( UNLINK => 0 ) };
121 print $fh $args{'Message'};
123 if ( -f $temp_file ) {
125 my $entity = $self->ParseMIMEEntityFromFile( $temp_file, $args{'Decode'}, $args{'Exact'} );
127 or RT->Logger->error("Unable to delete temp file $temp_file, error: $!");
133 #If for some reason we weren't able to parse the message using a temp file
134 # try it with a scalar
135 if ( $@ || !$self->Entity ) {
136 return $self->ParseMIMEEntityFromScalar( $args{'Message'}, $args{'Decode'}, $args{'Exact'} );
142 =head2 ParseMIMEEntityFromSTDIN
144 Parse a message from standard input
148 sub ParseMIMEEntityFromSTDIN {
150 return $self->ParseMIMEEntityFromFileHandle(\*STDIN, @_);
153 =head2 ParseMIMEEntityFromScalar $message
155 Takes either a scalar or a reference to a scalar which contains a stringified MIME message.
158 Returns true if it wins.
159 Returns false if it loses.
163 sub ParseMIMEEntityFromScalar {
165 return $self->_ParseMIMEEntity( shift, 'parse_data', @_ );
168 =head2 ParseMIMEEntityFromFilehandle *FH
170 Parses a mime entity from a filehandle passed in as an argument
174 sub ParseMIMEEntityFromFileHandle {
176 return $self->_ParseMIMEEntity( shift, 'parse', @_ );
179 =head2 ParseMIMEEntityFromFile
181 Parses a mime entity from a filename passed in as an argument
185 sub ParseMIMEEntityFromFile {
187 return $self->_ParseMIMEEntity( shift, 'parse_open', @_ );
191 sub _ParseMIMEEntity {
195 my $postprocess = (@_ ? shift : 1);
198 # Create a new parser object:
199 my $parser = MIME::Parser->new();
200 $self->_SetupMIMEParser($parser);
201 $parser->decode_bodies(0) if $exact;
203 # TODO: XXX 3.0 we really need to wrap this in an eval { }
204 unless ( $self->{'entity'} = $parser->$method($message) ) {
205 $RT::Logger->crit("Couldn't parse MIME stream and extract the submessages");
206 # Try again, this time without extracting nested messages
207 $parser->extract_nested_messages(0);
208 unless ( $self->{'entity'} = $parser->$method($message) ) {
209 $RT::Logger->crit("couldn't parse MIME stream");
214 $self->_PostProcessNewEntity if $postprocess;
216 return $self->{'entity'};
221 return unless $self->{'entity'};
223 my @parts = $self->{'entity'}->parts_DFS;
224 $self->_DecodeBody($_) foreach @parts;
231 my $old = $entity->bodyhandle or return;
232 return unless $old->is_encoded;
234 require MIME::Decoder;
235 my $encoding = $entity->head->mime_encoding;
236 my $decoder = MIME::Decoder->new($encoding);
237 unless ( $decoder ) {
238 $RT::Logger->error("Couldn't find decoder for '$encoding', switching to binary");
244 # XXX: use InCore for now, but later must switch to files
245 my $new = MIME::Body::InCore->new();
249 my $source = $old->open('r') or die "couldn't open body: $!";
250 my $destination = $new->open('w') or die "couldn't open body: $!";
253 eval { $decoder->decode($source, $destination) };
254 $RT::Logger->error($@) if $@;
256 $source->close or die "can't close: $!";
257 $destination->close or die "can't close: $!";
259 $entity->bodyhandle( $new );
262 =head2 _PostProcessNewEntity
264 cleans up and postprocesses a newly parsed MIME Entity
268 sub _PostProcessNewEntity {
271 #Now we've got a parsed mime object.
273 # Unfold headers that are have embedded newlines
274 # Better do this before conversion or it will break
275 # with multiline encoded Subject (RFC2047) (fsck.com #5594)
278 # try to convert text parts into utf-8 charset
279 RT::I18N::SetMIMEEntityToEncoding($self->{'entity'}, 'utf-8');
282 =head2 ParseCcAddressesFromHead HASHREF
284 Takes a hashref object containing QueueObj, Head and CurrentUser objects.
285 Returns a list of all email addresses in the To and Cc
286 headers b<except> the current Queue's email addresses, the CurrentUser's
287 email address and anything that the RT->Config->Get('RTAddressRegexp') matches.
291 sub ParseCcAddressesFromHead {
295 CurrentUser => undef,
301 my @ToObjs = Email::Address->parse( Encode::decode( "UTF-8", $self->Head->get('To') ) );
302 my @CcObjs = Email::Address->parse( Encode::decode( "UTF-8", $self->Head->get('Cc') ) );
304 foreach my $AddrObj ( @ToObjs, @CcObjs ) {
305 my $Address = $AddrObj->address;
306 my $user = RT::User->new(RT->SystemUser);
307 $Address = $user->CanonicalizeEmailAddress($Address);
308 next if lc $args{'CurrentUser'}->EmailAddress eq lc $Address;
309 next if $self->IsRTAddress($Address);
311 push ( @Addresses, $Address );
317 =head2 IsRTaddress ADDRESS
319 Takes a single parameter, an email address.
320 Returns true if that address matches the C<RTAddressRegexp> config option.
321 Returns false, otherwise.
330 return undef unless defined($address) and $address =~ /\S/;
332 if ( my $address_re = RT->Config->Get('RTAddressRegexp') ) {
333 return $address =~ /$address_re/i ? 1 : undef;
336 # we don't warn here, but do in config check
337 if ( my $correspond_address = RT->Config->Get('CorrespondAddress') ) {
338 return 1 if lc $correspond_address eq lc $address;
340 if ( my $comment_address = RT->Config->Get('CommentAddress') ) {
341 return 1 if lc $comment_address eq lc $address;
344 my $queue = RT::Queue->new( RT->SystemUser );
345 $queue->LoadByCols( CorrespondAddress => $address );
346 return 1 if $queue->id;
348 $queue->LoadByCols( CommentAddress => $address );
349 return 1 if $queue->id;
355 =head2 CullRTAddresses ARRAY
357 Takes a single argument, an array of email addresses.
358 Returns the same array with any IsRTAddress()es weeded out.
363 sub CullRTAddresses {
365 my @addresses = (@_);
367 return grep { !$self->IsRTAddress($_) } @addresses;
374 # LookupExternalUserInfo is a site-definable method for synchronizing
375 # incoming users with an external data source.
377 # This routine takes a tuple of EmailAddress and FriendlyName
378 # EmailAddress is the user's email address, ususally taken from
379 # an email message's From: header.
380 # FriendlyName is a freeform string, ususally taken from the "comment"
381 # portion of an email message's From: header.
383 # If you define an AutoRejectRequest template, RT will use this
384 # template for the rejection message.
387 =head2 LookupExternalUserInfo
389 LookupExternalUserInfo is a site-definable method for synchronizing
390 incoming users with an external data source.
392 This routine takes a tuple of EmailAddress and FriendlyName
393 EmailAddress is the user's email address, ususally taken from
394 an email message's From: header.
395 FriendlyName is a freeform string, ususally taken from the "comment"
396 portion of an email message's From: header.
398 It returns (FoundInExternalDatabase, ParamHash);
400 FoundInExternalDatabase must be set to 1 before return if the user
401 was found in the external database.
403 ParamHash is a Perl parameter hash which can contain at least the
404 following fields. These fields are used to populate RT's users
405 database when the user is created.
407 EmailAddress is the email address that RT should use for this user.
408 Name is the 'Name' attribute RT should use for this user.
409 'Name' is used for things like access control and user lookups.
410 RealName is what RT should display as the user's name when displaying
415 sub LookupExternalUserInfo {
417 my $EmailAddress = shift;
418 my $RealName = shift;
420 my $FoundInExternalDatabase = 1;
423 #Name is the RT username you want to use for this user.
424 $params{'Name'} = $EmailAddress;
425 $params{'EmailAddress'} = $EmailAddress;
426 $params{'RealName'} = $RealName;
428 return ($FoundInExternalDatabase, %params);
433 Return the parsed head from this message
439 return $self->Entity->head;
444 Return the parsed Entity from this message
450 return $self->{'entity'};
455 =head2 _SetupMIMEParser $parser
457 A private instance method which sets up a mime parser to do its job
462 ## TODO: Does it make sense storing to disk at all? After all, we
463 ## need to put each msg as an in-core scalar before saving it to
464 ## the database, don't we?
466 ## At the same time, we should make sure that we nuke attachments
467 ## Over max size and return them
469 sub _SetupMIMEParser {
473 # Set up output directory for files; we use $RT::VarPath instead
474 # of File::Spec->tmpdir (e.g., /tmp) beacuse it isn't always
477 if ( -w $RT::VarPath ) {
478 $tmpdir = File::Temp::tempdir( DIR => $RT::VarPath, CLEANUP => 1 );
479 } elsif (-w File::Spec->tmpdir) {
480 $tmpdir = File::Temp::tempdir( TMPDIR => 1, CLEANUP => 1 );
482 $RT::Logger->crit("Neither the RT var directory ($RT::VarPath) nor the system tmpdir (@{[File::Spec->tmpdir]}) are writable; falling back to in-memory parsing!");
485 #If someone includes a message, extract it
486 $parser->extract_nested_messages(1);
487 $parser->extract_uuencode(1); ### default is false
490 # If we got a writable tmpdir, write to disk
491 push ( @{ $self->{'AttachmentDirs'} ||= [] }, $tmpdir );
492 $parser->output_dir($tmpdir);
493 $parser->filer->ignore_filename(1);
495 # Set up the prefix for files with auto-generated names:
496 $parser->output_prefix("part");
498 # From the MIME::Parser docs:
499 # "Normally, tmpfiles are created when needed during parsing, and destroyed automatically when they go out of scope"
500 # Turns out that the default is to recycle tempfiles
501 # Temp files should never be recycled, especially when running under perl taint checking
503 $parser->tmp_recycling(0) if $parser->can('tmp_recycling');
505 # Otherwise, fall back to storing it in memory
506 $parser->output_to_core(1);
507 $parser->tmp_to_core(1);
508 $parser->use_inner_files(1);
513 =head2 ParseEmailAddress string
515 Returns a list of Email::Address objects
516 Works around the bug that Email::Address 1.889 and earlier
517 doesn't handle local-only email addresses (when users pass
518 in just usernames on the RT system in fields that expect
521 We don't handle the case of
522 bob, fred@bestpractical.com
523 because we don't want to fail parsing
524 bob, "Falcone, Fred" <fred@bestpractical.com>
525 The next release of Email::Address will have a new method
526 we can use that removes the bandaid
530 use Email::Address::List;
532 sub ParseEmailAddress {
534 my $address_string = shift;
536 my @list = Email::Address::List->parse(
541 my $logger = sub { RT->Logger->error(
542 "Unable to parse an email address from $address_string: ". shift
546 foreach my $e ( @list ) {
547 if ($e->{'type'} eq 'mailbox') {
548 if ($e->{'not_ascii'}) {
549 $logger->($e->{'value'} ." contains not ASCII values");
552 push @addresses, $e->{'value'}
553 } elsif ( $e->{'value'} =~ /^\s*(\w+)\s*$/ ) {
554 my $user = RT::User->new( RT->SystemUser );
557 push @addresses, Email::Address->new($user->Name, $user->EmailAddress);
559 $logger->($e->{'value'} ." is not a valid email address and is not user name");
562 $logger->($e->{'value'} ." is not a valid email address");
566 $self->CleanupAddresses(@addresses);
571 =head2 CleanupAddresses ARRAY
573 Massages an array of L<Email::Address> objects to make their email addresses
576 Currently this strips off surrounding single quotes around C<< ->address >> and
577 B<< modifies the L<Email::Address> objects in-place >>.
579 Returns the list of objects for convienence in C<map>/C<grep> chains.
583 sub CleanupAddresses {
587 next unless defined $addr;
588 # Outlook sometimes sends addresses surrounded by single quotes;
590 if ((my $email = $addr->address) =~ s/^'(.+)'$/$1/) {
591 $addr->address($email);
599 Outlook 2007/2010 have a bug when you write an email with the html format.
600 it will send a 'multipart/alternative' with both 'text/plain' and 'text/html'
601 in it. it's cool to have a 'text/plain' part, but the problem is the part is
602 not so right: all the "\n" in your main message will become "\n\n" :/
604 this method will fix this bug, i.e. replaces "\n\n" to "\n".
605 return 1 if it does find the problem in the entity and get it fixed.
612 my $mime = $self->Entity();
613 return unless $mime && $self->LooksLikeMSEmail($mime);
616 if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
617 my $first = $mime->parts(0);
618 if ( $first && $first->head->get('Content-Type') =~ m{multipart/alternative} )
620 my $inner_first = $first->parts(0);
621 if ( $inner_first && $inner_first->head->get('Content-Type') =~ m{text/plain} )
623 $text_part = $inner_first;
627 elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
628 my $first = $mime->parts(0);
629 if ( $first && $first->head->get('Content-Type') =~ m{text/plain} ) {
634 # Add base64 since we've seen examples of double newlines with
635 # this type too. Need an example of a multi-part base64 to
636 # handle that permutation if it exists.
637 elsif ( ($mime->head->get('Content-Transfer-Encoding')||'') =~ m{base64} ) {
638 $text_part = $mime; # Assuming single part, already decoded.
643 # use the unencoded string
644 my $content = $text_part->bodyhandle->as_string;
645 if ( $content =~ s/\n\n/\n/g ) {
647 # Outlook puts a space on extra newlines, remove it
648 $content =~ s/\ +$//mg;
650 # only write only if we did change the content
651 if ( my $io = $text_part->open("w") ) {
652 $io->print($content);
655 "Removed extra newlines from MS Outlook message.");
659 $RT::Logger->error("Can't write to body to fix newlines");
667 =head1 LooksLikeMSEmail
669 Try to determine if the current email may have
670 come from MS Outlook or gone through Exchange, and therefore
671 may have extra newlines added.
675 sub LooksLikeMSEmail {
679 my $mailer = $mime->head->get('X-Mailer');
681 # 12.0 is outlook 2007, 14.0 is 2010
682 return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
684 if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
686 # Check for additional headers that might
687 # indicate this came from Outlook or through Exchange.
688 # A sample we received had the headers X-MS-Has-Attach: and
689 # X-MS-Tnef-Correlator: and both had no value.
691 my @tags = $mime->head->tags();
692 return 1 if grep { /^X-MS-/ } @tags;
695 return 0; # Doesn't look like MS email.
700 File::Path::rmtree([@{$self->{'AttachmentDirs'}}],0,1)
701 if $self->{'AttachmentDirs'};
706 RT::Base->_ImportOverlays();