rt/lib/RT/EmailParser.pm

   1 # BEGIN BPS TAGGED BLOCK {{{
   2 #
   3 # COPYRIGHT:
   4 #
   5 # This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
   6 #                                          <sales@bestpractical.com>
   7 #
   8 # (Except where explicitly superseded by other copyright notices)
   9 #
  10 #
  11 # LICENSE:
  12 #
  13 # This work is made available to you under the terms of Version 2 of
  14 # the GNU General Public License. A copy of that license should have
  15 # been provided with this software, but in any event can be snarfed
  16 # from www.gnu.org.
  17 #
  18 # This work is distributed in the hope that it will be useful, but
  19 # WITHOUT ANY WARRANTY; without even the implied warranty of
  20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 # General Public License for more details.
  22 #
  23 # You should have received a copy of the GNU General Public License
  24 # along with this program; if not, write to the Free Software
  25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  26 # 02110-1301 or visit their web page on the internet at
  27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
  28 #
  29 #
  30 # CONTRIBUTION SUBMISSION POLICY:
  31 #
  32 # (The following paragraph is not intended to limit the rights granted
  33 # to you to modify and distribute this software under the terms of
  34 # the GNU General Public License and is only of importance to you if
  35 # you choose to contribute your changes and enhancements to the
  36 # community by submitting them to Best Practical Solutions, LLC.)
  37 #
  38 # By intentionally submitting any modifications, corrections or
  39 # derivatives to this work, or any other work intended for use with
  40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
  41 # you are the copyright holder for those contributions and you grant
  42 # Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
  43 # royalty-free, perpetual, license to use, copy, create derivative
  44 # works based on those contributions, and sublicense and distribute
  45 # those contributions and any derivatives thereof.
  46 #
  47 # END BPS TAGGED BLOCK }}}
  48
  49 package RT::EmailParser;
  50
  51
  52 use base qw/RT::Base/;
  53
  54 use strict;
  55 use warnings;
  56
  57
  58 use Email::Address;
  59 use MIME::Entity;
  60 use MIME::Head;
  61 use MIME::Parser;
  62 use File::Temp qw/tempdir/;
  63
  64 =head1 NAME
  65
  66   RT::EmailParser - helper functions for parsing parts from incoming
  67   email messages
  68
  69 =head1 SYNOPSIS
  70
  71
  72 =head1 DESCRIPTION
  73
  74
  75
  76
  77 =head1 METHODS
  78
  79 =head2 new
  80
  81 Returns a new RT::EmailParser object
  82
  83 =cut
  84
  85 sub new  {
  86   my $proto = shift;
  87   my $class = ref($proto) || $proto;
  88   my $self  = {};
  89   bless ($self, $class);
  90   return $self;
  91 }
  92
  93
  94 =head2 SmartParseMIMEEntityFromScalar Message => SCALAR_REF [, Decode => BOOL, Exact => BOOL ] }
  95
  96 Parse a message stored in a scalar from scalar_ref.
  97
  98 =cut
  99
 100 sub SmartParseMIMEEntityFromScalar {
 101     my $self = shift;
 102     my %args = ( Message => undef, Decode => 1, Exact => 0, @_ );
 103
 104     eval {
 105         my ( $fh, $temp_file );
 106         for ( 1 .. 10 ) {
 107
 108             # on NFS and NTFS, it is possible that tempfile() conflicts
 109             # with other processes, causing a race condition. we try to
 110             # accommodate this by pausing and retrying.
 111             last
 112               if ( $fh, $temp_file ) =
 113               eval { File::Temp::tempfile( UNLINK => 0 ) };
 114             sleep 1;
 115         }
 116         if ($fh) {
 117
 118             #thank you, windows
 119             binmode $fh;
 120             $fh->autoflush(1);
 121             print $fh $args{'Message'};
 122             close($fh);
 123             if ( -f $temp_file ) {
 124
 125                 # We have to trust the temp file's name -- untaint it
 126                 $temp_file =~ /(.*)/;
 127                 my $entity = $self->ParseMIMEEntityFromFile( $1, $args{'Decode'}, $args{'Exact'} );
 128                 unlink($1);
 129                 return $entity;
 130             }
 131         }
 132     };
 133
 134     #If for some reason we weren't able to parse the message using a temp file
 135     # try it with a scalar
 136     if ( $@ || !$self->Entity ) {
 137         return $self->ParseMIMEEntityFromScalar( $args{'Message'}, $args{'Decode'}, $args{'Exact'} );
 138     }
 139
 140 }
 141
 142
 143 =head2 ParseMIMEEntityFromSTDIN
 144
 145 Parse a message from standard input
 146
 147 =cut
 148
 149 sub ParseMIMEEntityFromSTDIN {
 150     my $self = shift;
 151     return $self->ParseMIMEEntityFromFileHandle(\*STDIN, @_);
 152 }
 153
 154 =head2 ParseMIMEEntityFromScalar  $message
 155
 156 Takes either a scalar or a reference to a scalar which contains a stringified MIME message.
 157 Parses it.
 158
 159 Returns true if it wins.
 160 Returns false if it loses.
 161
 162 =cut
 163
 164 sub ParseMIMEEntityFromScalar {
 165     my $self = shift;
 166     return $self->_ParseMIMEEntity( shift, 'parse_data', @_ );
 167 }
 168
 169 =head2 ParseMIMEEntityFromFilehandle *FH
 170
 171 Parses a mime entity from a filehandle passed in as an argument
 172
 173 =cut
 174
 175 sub ParseMIMEEntityFromFileHandle {
 176     my $self = shift;
 177     return $self->_ParseMIMEEntity( shift, 'parse', @_ );
 178 }
 179
 180 =head2 ParseMIMEEntityFromFile
 181
 182 Parses a mime entity from a filename passed in as an argument
 183
 184 =cut
 185
 186 sub ParseMIMEEntityFromFile {
 187     my $self = shift;
 188     return $self->_ParseMIMEEntity( shift, 'parse_open', @_ );
 189 }
 190
 191
 192 sub _ParseMIMEEntity {
 193     my $self = shift;
 194     my $message = shift;
 195     my $method = shift;
 196     my $postprocess = (@_ ? shift : 1);
 197     my $exact = shift;
 198
 199     # Create a new parser object:
 200     my $parser = MIME::Parser->new();
 201     $self->_SetupMIMEParser($parser);
 202     $parser->decode_bodies(0) if $exact;
 203
 204     # TODO: XXX 3.0 we really need to wrap this in an eval { }
 205     unless ( $self->{'entity'} = $parser->$method($message) ) {
 206         $RT::Logger->crit("Couldn't parse MIME stream and extract the submessages");
 207         # Try again, this time without extracting nested messages
 208         $parser->extract_nested_messages(0);
 209         unless ( $self->{'entity'} = $parser->$method($message) ) {
 210             $RT::Logger->crit("couldn't parse MIME stream");
 211             return ( undef);
 212         }
 213     }
 214
 215     $self->_PostProcessNewEntity if $postprocess;
 216
 217     return $self->{'entity'};
 218 }
 219
 220 sub _DecodeBodies {
 221     my $self = shift;
 222     return unless $self->{'entity'};
 223
 224     my @parts = $self->{'entity'}->parts_DFS;
 225     $self->_DecodeBody($_) foreach @parts;
 226 }
 227
 228 sub _DecodeBody {
 229     my $self = shift;
 230     my $entity = shift;
 231
 232     my $old = $entity->bodyhandle or return;
 233     return unless $old->is_encoded;
 234
 235     require MIME::Decoder;
 236     my $encoding = $entity->head->mime_encoding;
 237     my $decoder = MIME::Decoder->new($encoding);
 238     unless ( $decoder ) {
 239         $RT::Logger->error("Couldn't find decoder for '$encoding', switching to binary");
 240         $old->is_encoded(0);
 241         return;
 242     }
 243
 244     require MIME::Body;
 245     # XXX: use InCore for now, but later must switch to files
 246     my $new = MIME::Body::InCore->new();
 247     $new->binmode(1);
 248     $new->is_encoded(0);
 249
 250     my $source = $old->open('r') or die "couldn't open body: $!";
 251     my $destination = $new->open('w') or die "couldn't open body: $!";
 252     {
 253         local $@;
 254         eval { $decoder->decode($source, $destination) };
 255         $RT::Logger->error($@) if $@;
 256     }
 257     $source->close or die "can't close: $!";
 258     $destination->close or die "can't close: $!";
 259
 260     $entity->bodyhandle( $new );
 261 }
 262
 263 =head2 _PostProcessNewEntity
 264
 265 cleans up and postprocesses a newly parsed MIME Entity
 266
 267 =cut
 268
 269 sub _PostProcessNewEntity {
 270     my $self = shift;
 271
 272     #Now we've got a parsed mime object.
 273
 274     # Unfold headers that are have embedded newlines
 275     #  Better do this before conversion or it will break
 276     #  with multiline encoded Subject (RFC2047) (fsck.com #5594)
 277     $self->Head->unfold;
 278
 279     # try to convert text parts into utf-8 charset
 280     RT::I18N::SetMIMEEntityToEncoding($self->{'entity'}, 'utf-8');
 281 }
 282
 283 =head2 ParseCcAddressesFromHead HASHREF
 284
 285 Takes a hashref object containing QueueObj, Head and CurrentUser objects.
 286 Returns a list of all email addresses in the To and Cc
 287 headers b<except> the current Queue's email addresses, the CurrentUser's
 288 email address and anything that the RT->Config->Get('RTAddressRegexp') matches.
 289
 290 =cut
 291
 292 sub ParseCcAddressesFromHead {
 293     my $self = shift;
 294     my %args = (
 295         QueueObj    => undef,
 296         CurrentUser => undef,
 297         @_
 298     );
 299
 300     my (@Addresses);
 301
 302     my @ToObjs = Email::Address->parse( $self->Head->get('To') );
 303     my @CcObjs = Email::Address->parse( $self->Head->get('Cc') );
 304
 305     foreach my $AddrObj ( @ToObjs, @CcObjs ) {
 306         my $Address = $AddrObj->address;
 307         my $user = RT::User->new(RT->SystemUser);
 308         $Address = $user->CanonicalizeEmailAddress($Address);
 309         next if lc $args{'CurrentUser'}->EmailAddress eq lc $Address;
 310         next if $self->IsRTAddress($Address);
 311
 312         push ( @Addresses, $Address );
 313     }
 314     return (@Addresses);
 315 }
 316
 317
 318 =head2 IsRTaddress ADDRESS
 319
 320 Takes a single parameter, an email address.
 321 Returns true if that address matches the C<RTAddressRegexp> config option.
 322 Returns false, otherwise.
 323
 324
 325 =cut
 326
 327 sub IsRTAddress {
 328     my $self = shift;
 329     my $address = shift;
 330
 331     if ( my $address_re = RT->Config->Get('RTAddressRegexp') ) {
 332         return $address =~ /$address_re/i ? 1 : undef;
 333     }
 334
 335     # we don't warn here, but do in config check
 336     if ( my $correspond_address = RT->Config->Get('CorrespondAddress') ) {
 337         return 1 if lc $correspond_address eq lc $address;
 338     }
 339     if ( my $comment_address = RT->Config->Get('CommentAddress') ) {
 340         return 1 if lc $comment_address eq lc $address;
 341     }
 342
 343     my $queue = RT::Queue->new( RT->SystemUser );
 344     $queue->LoadByCols( CorrespondAddress => $address );
 345     return 1 if $queue->id;
 346
 347     $queue->LoadByCols( CommentAddress => $address );
 348     return 1 if $queue->id;
 349
 350     return undef;
 351 }
 352
 353
 354 =head2 CullRTAddresses ARRAY
 355
 356 Takes a single argument, an array of email addresses.
 357 Returns the same array with any IsRTAddress()es weeded out.
 358
 359
 360 =cut
 361
 362 sub CullRTAddresses {
 363     my $self = shift;
 364     my @addresses = (@_);
 365
 366     return grep { !$self->IsRTAddress($_) } @addresses;
 367 }
 368
 369
 370
 371
 372
 373 # LookupExternalUserInfo is a site-definable method for synchronizing
 374 # incoming users with an external data source.
 375 #
 376 # This routine takes a tuple of EmailAddress and FriendlyName
 377 #   EmailAddress is the user's email address, ususally taken from
 378 #       an email message's From: header.
 379 #   FriendlyName is a freeform string, ususally taken from the "comment"
 380 #       portion of an email message's From: header.
 381 #
 382 # If you define an AutoRejectRequest template, RT will use this
 383 # template for the rejection message.
 384
 385
 386 =head2 LookupExternalUserInfo
 387
 388  LookupExternalUserInfo is a site-definable method for synchronizing
 389  incoming users with an external data source.
 390
 391  This routine takes a tuple of EmailAddress and FriendlyName
 392     EmailAddress is the user's email address, ususally taken from
 393         an email message's From: header.
 394     FriendlyName is a freeform string, ususally taken from the "comment"
 395         portion of an email message's From: header.
 396
 397  It returns (FoundInExternalDatabase, ParamHash);
 398
 399    FoundInExternalDatabase must  be set to 1 before return if the user
 400    was found in the external database.
 401
 402    ParamHash is a Perl parameter hash which can contain at least the
 403    following fields. These fields are used to populate RT's users
 404    database when the user is created.
 405
 406     EmailAddress is the email address that RT should use for this user.
 407     Name is the 'Name' attribute RT should use for this user.
 408          'Name' is used for things like access control and user lookups.
 409     RealName is what RT should display as the user's name when displaying
 410          'friendly' names
 411
 412 =cut
 413
 414 sub LookupExternalUserInfo {
 415   my $self = shift;
 416   my $EmailAddress = shift;
 417   my $RealName = shift;
 418
 419   my $FoundInExternalDatabase = 1;
 420   my %params;
 421
 422   #Name is the RT username you want to use for this user.
 423   $params{'Name'} = $EmailAddress;
 424   $params{'EmailAddress'} = $EmailAddress;
 425   $params{'RealName'} = $RealName;
 426
 427   return ($FoundInExternalDatabase, %params);
 428 }
 429
 430 =head2 Head
 431
 432 Return the parsed head from this message
 433
 434 =cut
 435
 436 sub Head {
 437     my $self = shift;
 438     return $self->Entity->head;
 439 }
 440
 441 =head2 Entity
 442
 443 Return the parsed Entity from this message
 444
 445 =cut
 446
 447 sub Entity {
 448     my $self = shift;
 449     return $self->{'entity'};
 450 }
 451
 452
 453
 454 =head2 _SetupMIMEParser $parser
 455
 456 A private instance method which sets up a mime parser to do its job
 457
 458 =cut
 459
 460
 461     ## TODO: Does it make sense storing to disk at all?  After all, we
 462     ## need to put each msg as an in-core scalar before saving it to
 463     ## the database, don't we?
 464
 465     ## At the same time, we should make sure that we nuke attachments
 466     ## Over max size and return them
 467
 468 sub _SetupMIMEParser {
 469     my $self   = shift;
 470     my $parser = shift;
 471
 472     # Set up output directory for files; we use $RT::VarPath instead
 473     # of File::Spec->tmpdir (e.g., /tmp) beacuse it isn't always
 474     # writable.
 475     my $tmpdir;
 476     if ( -w $RT::VarPath ) {
 477         $tmpdir = File::Temp::tempdir( DIR => $RT::VarPath, CLEANUP => 1 );
 478     } elsif (-w File::Spec->tmpdir) {
 479         $tmpdir = File::Temp::tempdir( TMPDIR => 1, CLEANUP => 1 );
 480     } else {
 481         $RT::Logger->crit("Neither the RT var directory ($RT::VarPath) nor the system tmpdir (@{[File::Spec->tmpdir]}) are writable; falling back to in-memory parsing!");
 482     }
 483
 484     #If someone includes a message, extract it
 485     $parser->extract_nested_messages(1);
 486     $parser->extract_uuencode(1);    ### default is false
 487
 488     if ($tmpdir) {
 489         # If we got a writable tmpdir, write to disk
 490         push ( @{ $self->{'AttachmentDirs'} ||= [] }, $tmpdir );
 491         $parser->output_dir($tmpdir);
 492         $parser->filer->ignore_filename(1);
 493
 494         # Set up the prefix for files with auto-generated names:
 495         $parser->output_prefix("part");
 496
 497         # From the MIME::Parser docs:
 498         # "Normally, tmpfiles are created when needed during parsing, and destroyed automatically when they go out of scope"
 499         # Turns out that the default is to recycle tempfiles
 500         # Temp files should never be recycled, especially when running under perl taint checking
 501
 502         $parser->tmp_recycling(0) if $parser->can('tmp_recycling');
 503     } else {
 504         # Otherwise, fall back to storing it in memory
 505         $parser->output_to_core(1);
 506         $parser->tmp_to_core(1);
 507         $parser->use_inner_files(1);
 508     }
 509
 510 }
 511
 512 =head2 ParseEmailAddress string
 513
 514 Returns a list of Email::Address objects
 515 Works around the bug that Email::Address 1.889 and earlier
 516 doesn't handle local-only email addresses (when users pass
 517 in just usernames on the RT system in fields that expect
 518 Email Addresses)
 519
 520 We don't handle the case of
 521 bob, fred@bestpractical.com
 522 because we don't want to fail parsing
 523 bob, "Falcone, Fred" <fred@bestpractical.com>
 524 The next release of Email::Address will have a new method
 525 we can use that removes the bandaid
 526
 527 =cut
 528
 529 sub ParseEmailAddress {
 530     my $self = shift;
 531     my $address_string = shift;
 532
 533     $address_string =~ s/^\s+|\s+$//g;
 534
 535     my @addresses;
 536     # if it looks like a username / local only email
 537     if ($address_string !~ /@/ && $address_string =~ /^\w+$/) {
 538         my $user = RT::User->new( RT->SystemUser );
 539         my ($id, $msg) = $user->Load($address_string);
 540         if ($id) {
 541             push @addresses, Email::Address->new($user->Name,$user->EmailAddress);
 542         } else {
 543             $RT::Logger->error("Unable to parse an email address from $address_string: $msg");
 544         }
 545     } else {
 546         @addresses = Email::Address->parse($address_string);
 547     }
 548
 549     $self->CleanupAddresses(@addresses);
 550
 551     return @addresses;
 552
 553 }
 554
 555 =head2 CleanupAddresses ARRAY
 556
 557 Massages an array of L<Email::Address> objects to make their email addresses
 558 more palatable.
 559
 560 Currently this strips off surrounding single quotes around C<< ->address >> and
 561 B<< modifies the L<Email::Address> objects in-place >>.
 562
 563 Returns the list of objects for convienence in C<map>/C<grep> chains.
 564
 565 =cut
 566
 567 sub CleanupAddresses {
 568     my $self = shift;
 569
 570     for my $addr (@_) {
 571         next unless defined $addr;
 572         # Outlook sometimes sends addresses surrounded by single quotes;
 573         # clean them all up
 574         if ((my $email = $addr->address) =~ s/^'(.+)'$/$1/) {
 575             $addr->address($email);
 576         }
 577     }
 578     return @_;
 579 }
 580
 581 =head2 RescueOutlook
 582
 583 Outlook 2007/2010 have a bug when you write an email with the html format.
 584 it will send a 'multipart/alternative' with both 'text/plain' and 'text/html'
 585 in it.  it's cool to have a 'text/plain' part, but the problem is the part is
 586 not so right: all the "\n" in your main message will become "\n\n" :/
 587
 588 this method will fix this bug, i.e. replaces "\n\n" to "\n".
 589 return 1 if it does find the problem in the entity and get it fixed.
 590
 591 =cut
 592
 593
 594 sub RescueOutlook {
 595     my $self = shift;
 596     my $mime = $self->Entity();
 597     return unless $mime && $self->LooksLikeMSEmail($mime);
 598
 599     my $text_part;
 600     if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
 601         my $first = $mime->parts(0);
 602         if ( $first && $first->head->get('Content-Type') =~ m{multipart/alternative} )
 603         {
 604             my $inner_first = $first->parts(0);
 605             if ( $inner_first && $inner_first->head->get('Content-Type') =~ m{text/plain} )
 606             {
 607                 $text_part = $inner_first;
 608             }
 609         }
 610     }
 611     elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
 612         my $first = $mime->parts(0);
 613         if ( $first && $first->head->get('Content-Type') =~ m{text/plain} ) {
 614             $text_part = $first;
 615         }
 616     }
 617
 618     # Add base64 since we've seen examples of double newlines with
 619     # this type too. Need an example of a multi-part base64 to
 620     # handle that permutation if it exists.
 621     elsif ( $mime->head->get('Content-Transfer-Encoding') =~ m{base64} ) {
 622         $text_part = $mime;    # Assuming single part, already decoded.
 623     }
 624
 625     if ($text_part) {
 626
 627         # use the unencoded string
 628         my $content = $text_part->bodyhandle->as_string;
 629         if ( $content =~ s/\n\n/\n/g ) {
 630
 631             # Outlook puts a space on extra newlines, remove it
 632             $content =~ s/\ +$//mg;
 633
 634             # only write only if we did change the content
 635             if ( my $io = $text_part->open("w") ) {
 636                 $io->print($content);
 637                 $io->close;
 638                 $RT::Logger->debug(
 639                     "Removed extra newlines from MS Outlook message.");
 640                 return 1;
 641             }
 642             else {
 643                 $RT::Logger->error("Can't write to body to fix newlines");
 644             }
 645         }
 646     }
 647
 648     return;
 649 }
 650
 651 =head1 LooksLikeMSEmail
 652
 653 Try to determine if the current email may have
 654 come from MS Outlook or gone through Exchange, and therefore
 655 may have extra newlines added.
 656
 657 =cut
 658
 659 sub LooksLikeMSEmail {
 660     my $self = shift;
 661     my $mime = shift;
 662
 663     my $mailer = $mime->head->get('X-Mailer');
 664
 665     # 12.0 is outlook 2007, 14.0 is 2010
 666     return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
 667
 668     if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
 669
 670         # Check for additional headers that might
 671         # indicate this came from Outlook or through Exchange.
 672         # A sample we received had the headers X-MS-Has-Attach: and
 673         # X-MS-Tnef-Correlator: and both had no value.
 674
 675         my @tags = $mime->head->tags();
 676         return 1 if grep { /^X-MS-/ } @tags;
 677     }
 678
 679     return 0;    # Doesn't look like MS email.
 680 }
 681
 682 sub DESTROY {
 683     my $self = shift;
 684     File::Path::rmtree([@{$self->{'AttachmentDirs'}}],0,1)
 685         if $self->{'AttachmentDirs'};
 686 }
 687
 688
 689
 690 RT::Base->_ImportOverlays();
 691
 692 1;