rt/lib/RT/EmailParser.pm

   1 # {{{ BEGIN BPS TAGGED BLOCK
   2 #
   3 # COPYRIGHT:
   4 #
   5 # This software is Copyright (c) 1996-2004 Best Practical Solutions, LLC
   6 #                                          <jesse@bestpractical.com>
   7 #
   8 # (Except where explicitly superseded by other copyright notices)
   9 #
  10 #
  11 # LICENSE:
  12 #
  13 # This work is made available to you under the terms of Version 2 of
  14 # the GNU General Public License. A copy of that license should have
  15 # been provided with this software, but in any event can be snarfed
  16 # from www.gnu.org.
  17 #
  18 # This work is distributed in the hope that it will be useful, but
  19 # WITHOUT ANY WARRANTY; without even the implied warranty of
  20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 # General Public License for more details.
  22 #
  23 # You should have received a copy of the GNU General Public License
  24 # along with this program; if not, write to the Free Software
  25 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  26 #
  27 #
  28 # CONTRIBUTION SUBMISSION POLICY:
  29 #
  30 # (The following paragraph is not intended to limit the rights granted
  31 # to you to modify and distribute this software under the terms of
  32 # the GNU General Public License and is only of importance to you if
  33 # you choose to contribute your changes and enhancements to the
  34 # community by submitting them to Best Practical Solutions, LLC.)
  35 #
  36 # By intentionally submitting any modifications, corrections or
  37 # derivatives to this work, or any other work intended for use with
  38 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
  39 # you are the copyright holder for those contributions and you grant
  40 # Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
  41 # royalty-free, perpetual, license to use, copy, create derivative
  42 # works based on those contributions, and sublicense and distribute
  43 # those contributions and any derivatives thereof.
  44 #
  45 # }}} END BPS TAGGED BLOCK
  46 package RT::EmailParser;
  47
  48
  49 use base qw/RT::Base/;
  50
  51 use strict;
  52 use Mail::Address;
  53 use MIME::Entity;
  54 use MIME::Head;
  55 use MIME::Parser;
  56 use File::Temp qw/tempdir/;
  57
  58 =head1 NAME
  59
  60   RT::EmailParser - helper functions for parsing parts from incoming
  61   email messages
  62
  63 =head1 SYNOPSIS
  64
  65
  66 =head1 DESCRIPTION
  67
  68
  69 =begin testing
  70
  71 ok(require RT::EmailParser);
  72
  73 =end testing
  74
  75
  76 =head1 METHODS
  77
  78 =head2 new
  79
  80
  81 =cut
  82
  83 sub new  {
  84   my $proto = shift;
  85   my $class = ref($proto) || $proto;
  86   my $self  = {};
  87   bless ($self, $class);
  88   return $self;
  89 }
  90
  91
  92
  93 # {{{ sub debug
  94
  95 sub debug {
  96     my $val = shift;
  97     my ($debug);
  98     if ($val) {
  99         $RT::Logger->debug( $val . "\n" );
 100         if ($debug) {
 101             print STDERR "$val\n";
 102         }
 103     }
 104     if ($debug) {
 105         return (1);
 106     }
 107 }
 108
 109 # }}}
 110
 111 # {{{ sub CheckForLoops
 112
 113 sub CheckForLoops {
 114     my $self = shift;
 115
 116     my $head = $self->Head;
 117
 118     #If this instance of RT sent it our, we don't want to take it in
 119     my $RTLoop = $head->get("X-RT-Loop-Prevention") || "";
 120     chomp($RTLoop);    #remove that newline
 121     if ( $RTLoop =~ /^\Q$RT::rtname\E/o ) {
 122         return (1);
 123     }
 124
 125     # TODO: We might not trap the case where RT instance A sends a mail
 126     # to RT instance B which sends a mail to ...
 127     return (undef);
 128 }
 129
 130 # }}}
 131
 132 # {{{ sub CheckForSuspiciousSender
 133
 134 sub CheckForSuspiciousSender {
 135     my $self = shift;
 136
 137     #if it's from a postmaster or mailer daemon, it's likely a bounce.
 138
 139     #TODO: better algorithms needed here - there is no standards for
 140     #bounces, so it's very difficult to separate them from anything
 141     #else.  At the other hand, the Return-To address is only ment to be
 142     #used as an error channel, we might want to put up a separate
 143     #Return-To address which is treated differently.
 144
 145     #TODO: search through the whole email and find the right Ticket ID.
 146
 147     my ( $From, $junk ) = $self->ParseSenderAddressFromHead();
 148
 149     if ( ( $From =~ /^mailer-daemon/i ) or ( $From =~ /^postmaster/i ) ) {
 150         return (1);
 151
 152     }
 153
 154     return (undef);
 155
 156 }
 157
 158 # }}}
 159
 160 # {{{ sub CheckForAutoGenerated
 161 sub CheckForAutoGenerated {
 162     my $self = shift;
 163     my $head = $self->Head;
 164
 165     my $Precedence = $head->get("Precedence") || "";
 166     if ( $Precedence =~ /^(bulk|junk)/i ) {
 167         return (1);
 168     }
 169     else {
 170         return (undef);
 171     }
 172 }
 173
 174 # }}}
 175
 176
 177 =head2 SmartParseMIMEEntityFromScalar { Message => SCALAR_REF, Decode => BOOL }
 178
 179 Parse a message stored in a scalar from scalar_ref
 180
 181
 182 =cut
 183
 184 sub SmartParseMIMEEntityFromScalar {
 185     my $self = shift;
 186     my %args = ( Message => undef, Decode => 1, @_ );
 187
 188     my ( $fh, $temp_file );
 189     eval {
 190
 191         for ( 1 .. 10 ) {
 192
 193             # on NFS and NTFS, it is possible that tempfile() conflicts
 194             # with other processes, causing a race condition. we try to
 195             # accommodate this by pausing and retrying.
 196             last
 197               if ( $fh, $temp_file ) =
 198               eval { File::Temp::tempfile( undef, UNLINK => 0 ) };
 199             sleep 1;
 200         }
 201         if ($fh) {
 202
 203             #thank you, windows
 204             binmode $fh;
 205             $fh->autoflush(1);
 206             print $fh $args{'Message'};
 207             close($fh);
 208             if ( -f $temp_file ) {
 209
 210                 # We have to trust the temp file's name -- untaint it
 211                 $temp_file =~ /(.*)/;
 212                 $self->ParseMIMEEntityFromFile( $1, $args{'Decode'} );
 213                 unlink($1);
 214             }
 215         }
 216     };
 217
 218     #If for some reason we weren't able to parse the message using a temp file
 219     # try it with a scalar
 220     if ( $@ || !$self->Entity ) {
 221         $self->ParseMIMEEntityFromScalar( $args{'Message'}, $args{'Decode'} );
 222     }
 223
 224 }
 225
 226 # {{{ sub ParseMIMEEntityFromSTDIN
 227
 228 sub ParseMIMEEntityFromSTDIN {
 229     my $self = shift;
 230     my $postprocess = (@_ ? shift : 1);
 231     return $self->ParseMIMEEntityFromFileHandle(\*STDIN, $postprocess);
 232 }
 233
 234 # }}}
 235
 236 =head2 ParseMIMEEntityFromScalar  $message
 237
 238 Takes either a scalar or a reference to a scalr which contains a stringified MIME message.
 239 Parses it.
 240
 241 Returns true if it wins.
 242 Returns false if it loses.
 243
 244
 245 =cut
 246
 247 sub ParseMIMEEntityFromScalar {
 248     my $self = shift;
 249     my $message = shift;
 250     my $postprocess = (@_ ? shift : 1);
 251     $self->_ParseMIMEEntity($message,'parse_data', $postprocess);
 252 }
 253
 254
 255 # {{{ ParseMIMEEntityFromFilehandle *FH
 256
 257 =head2 ParseMIMEEntityFromFilehandle *FH
 258
 259 Parses a mime entity from a filehandle passed in as an argument
 260
 261 =cut
 262
 263 sub ParseMIMEEntityFromFileHandle {
 264     my $self = shift;
 265     my $filehandle = shift;
 266     my $postprocess = (@_ ? shift : 1);
 267     $self->_ParseMIMEEntity($filehandle,'parse', $postprocess);
 268 }
 269
 270 # }}}
 271
 272 # {{{ ParseMIMEEntityFromFile
 273
 274 =head2 ParseMIMEEntityFromFile
 275
 276 Parses a mime entity from a filename passed in as an argument
 277
 278 =cut
 279
 280 sub ParseMIMEEntityFromFile {
 281     my $self = shift;
 282     my $file = shift;
 283     my $postprocess = (@_ ? shift : 1);
 284     $self->_ParseMIMEEntity($file,'parse_open',$postprocess);
 285 }
 286
 287 # }}}
 288 #
 289 #  {{{ _ParseMIMEEntity {
 290 sub _ParseMIMEEntity {
 291     my $self = shift;
 292     my $message = shift;
 293     my $method = shift;
 294     my $postprocess = shift;
 295     # Create a new parser object:
 296
 297     my $parser = MIME::Parser->new();
 298     $self->_SetupMIMEParser($parser);
 299
 300
 301     # TODO: XXX 3.0 we really need to wrap this in an eval { }
 302     unless ( $self->{'entity'} = $parser->$method($message) ) {
 303         $RT::Logger->crit("Couldn't parse MIME stream and extract the submessages");
 304         # Try again, this time without extracting nested messages
 305         $parser->extract_nested_messages(0);
 306         unless ( $self->{'entity'} = $parser->$method($message) ) {
 307             $RT::Logger->crit("couldn't parse MIME stream");
 308             return ( undef);
 309         }
 310     }
 311     if ($postprocess) {
 312     $self->_PostProcessNewEntity() ;
 313     }
 314
 315 }
 316
 317
 318 # }}}
 319
 320 # {{{ _PostProcessNewEntity
 321
 322 =head2 _PostProcessNewEntity
 323
 324 cleans up and postprocesses a newly parsed MIME Entity
 325
 326 =cut
 327
 328 sub _PostProcessNewEntity {
 329     my $self = shift;
 330
 331     #Now we've got a parsed mime object.
 332
 333     # try to convert text parts into utf-8 charset
 334     RT::I18N::SetMIMEEntityToEncoding($self->{'entity'}, 'utf-8');
 335
 336
 337     # Unfold headers that are have embedded newlines
 338     $self->Head->unfold;
 339
 340
 341 }
 342
 343 # }}}
 344
 345 # {{{ sub ParseTicketId
 346
 347 sub ParseTicketId {
 348     my $self = shift;
 349
 350     my $Subject = shift;
 351
 352     if ( $Subject =~ s/\[\Q$RT::rtname\E\s+\#(\d+)\s*\]//i ) {
 353         my $id = $1;
 354         $RT::Logger->debug("Found a ticket ID. It's $id");
 355         return ($id);
 356     }
 357     else {
 358         return (undef);
 359     }
 360 }
 361
 362 # }}}
 363
 364
 365
 366 # {{{ ParseCcAddressesFromHead
 367
 368 =head2 ParseCcAddressesFromHead HASHREF
 369
 370 Takes a hashref object containing QueueObj, Head and CurrentUser objects.
 371 Returns a list of all email addresses in the To and Cc
 372 headers b<except> the current Queue\'s email addresses, the CurrentUser\'s
 373 email address  and anything that the $RTAddressRegexp matches.
 374
 375 =cut
 376
 377 sub ParseCcAddressesFromHead {
 378
 379     my $self = shift;
 380
 381     my %args = (
 382         QueueObj    => undef,
 383         CurrentUser => undef,
 384         @_
 385     );
 386
 387     my (@Addresses);
 388
 389     my @ToObjs = Mail::Address->parse( $self->Head->get('To') );
 390     my @CcObjs = Mail::Address->parse( $self->Head->get('Cc') );
 391
 392     foreach my $AddrObj ( @ToObjs, @CcObjs ) {
 393         my $Address = $AddrObj->address;
 394         my $user = RT::User->new($RT::SystemUser);
 395         $Address = $user->CanonicalizeEmailAddress($Address);
 396         next if ( $args{'CurrentUser'}->EmailAddress   =~ /^$Address$/i );
 397         next if ( $args{'QueueObj'}->CorrespondAddress =~ /^$Address$/i );
 398         next if ( $args{'QueueObj'}->CommentAddress    =~ /^$Address$/i );
 399         next if ( IsRTAddress($Address) );
 400
 401         push ( @Addresses, $Address );
 402     }
 403     return (@Addresses);
 404 }
 405
 406 # }}}
 407
 408 # {{{ ParseSenderAdddressFromHead
 409
 410 =head2 ParseSenderAddressFromHead
 411
 412 Takes a MIME::Header object. Returns a tuple: (user@host, friendly name)
 413 of the From (evaluated in order of Reply-To:, From:, Sender)
 414
 415 =cut
 416
 417 sub ParseSenderAddressFromHead {
 418     my $self = shift;
 419
 420     #Figure out who's sending this message.
 421     my $From = $self->Head->get('Reply-To')
 422       || $self->Head->get('From')
 423       || $self->Head->get('Sender');
 424     return ( $self->ParseAddressFromHeader($From) );
 425 }
 426
 427 # }}}
 428
 429 # {{{ ParseErrorsToAdddressFromHead
 430
 431 =head2 ParseErrorsToAddressFromHead
 432
 433 Takes a MIME::Header object. Return a single value : user@host
 434 of the From (evaluated in order of Errors-To:,Reply-To:, From:, Sender)
 435
 436 =cut
 437
 438 sub ParseErrorsToAddressFromHead {
 439     my $self = shift;
 440
 441     #Figure out who's sending this message.
 442
 443     foreach my $header ( 'Errors-To', 'Reply-To', 'From', 'Sender' ) {
 444
 445         # If there's a header of that name
 446         my $headerobj = $self->Head->get($header);
 447         if ($headerobj) {
 448             my ( $addr, $name ) = $self->ParseAddressFromHeader($headerobj);
 449
 450             # If it's got actual useful content...
 451             return ($addr) if ($addr);
 452         }
 453     }
 454 }
 455
 456 # }}}
 457
 458 # {{{ ParseAddressFromHeader
 459
 460 =head2 ParseAddressFromHeader ADDRESS
 461
 462 Takes an address from $self->Head->get('Line') and returns a tuple: user@host, friendly name
 463
 464 =cut
 465
 466 sub ParseAddressFromHeader {
 467     my $self = shift;
 468     my $Addr = shift;
 469
 470     # Perl 5.8.0 breaks when doing regex matches on utf8
 471     Encode::_utf8_off($Addr) if $] == 5.008;
 472     my @Addresses = Mail::Address->parse($Addr);
 473
 474     my $AddrObj = $Addresses[0];
 475
 476     unless ( ref($AddrObj) ) {
 477         return ( undef, undef );
 478     }
 479
 480     my $Name = ( $AddrObj->phrase || $AddrObj->comment || $AddrObj->address );
 481
 482     #Lets take the from and load a user object.
 483     my $Address = $AddrObj->address;
 484
 485     return ( $Address, $Name );
 486 }
 487
 488 # }}}
 489
 490 # {{{ IsRTAddress
 491
 492 =item IsRTaddress ADDRESS
 493
 494 Takes a single parameter, an email address.
 495 Returns true if that address matches the $RTAddressRegexp.
 496 Returns false, otherwise.
 497
 498 =begin testing
 499
 500 is(RT::EmailParser::IsRTAddress("","rt\@example.com"),1, "Regexp matched rt address" );
 501 is(RT::EmailParser::IsRTAddress("","frt\@example.com"),undef, "Regexp didn't match non-rt address" );
 502
 503 =end testing
 504
 505 =cut
 506
 507 sub IsRTAddress {
 508     my $self = shift;
 509     my $address = shift;
 510
 511     # Example: the following rule would tell RT not to Cc
 512     #   "tickets@noc.example.com"
 513     if ( defined($RT::RTAddressRegexp) &&
 514                        $address =~ /$RT::RTAddressRegexp/ ) {
 515         return(1);
 516     } else {
 517         return (undef);
 518     }
 519 }
 520
 521 # }}}
 522
 523
 524 # {{{ CullRTAddresses
 525
 526 =item CullRTAddresses ARRAY
 527
 528 Takes a single argument, an array of email addresses.
 529 Returns the same array with any IsRTAddress()es weeded out.
 530
 531 =begin testing
 532
 533 @before = ("rt\@example.com", "frt\@example.com");
 534 @after = ("frt\@example.com");
 535 ok(eq_array(RT::EmailParser::CullRTAddresses("",@before),@after), "CullRTAddresses only culls RT addresses");
 536
 537 =end testing
 538
 539 =cut
 540
 541 sub CullRTAddresses {
 542     my $self = shift;
 543     my @addresses= (@_);
 544     my @addrlist;
 545
 546     foreach my $addr( @addresses ) {
 547       push (@addrlist, $addr)    unless IsRTAddress("", $addr);
 548     }
 549     return (@addrlist);
 550 }
 551
 552 # }}}
 553
 554
 555 # {{{ LookupExternalUserInfo
 556
 557
 558 # LookupExternalUserInfo is a site-definable method for synchronizing
 559 # incoming users with an external data source.
 560 #
 561 # This routine takes a tuple of EmailAddress and FriendlyName
 562 #   EmailAddress is the user's email address, ususally taken from
 563 #       an email message's From: header.
 564 #   FriendlyName is a freeform string, ususally taken from the "comment"
 565 #       portion of an email message's From: header.
 566 #
 567 # If you define an AutoRejectRequest template, RT will use this
 568 # template for the rejection message.
 569
 570
 571 =item LookupExternalUserInfo
 572
 573  LookupExternalUserInfo is a site-definable method for synchronizing
 574  incoming users with an external data source.
 575
 576  This routine takes a tuple of EmailAddress and FriendlyName
 577     EmailAddress is the user's email address, ususally taken from
 578         an email message's From: header.
 579     FriendlyName is a freeform string, ususally taken from the "comment"
 580         portion of an email message's From: header.
 581
 582  It returns (FoundInExternalDatabase, ParamHash);
 583
 584    FoundInExternalDatabase must  be set to 1 before return if the user was
 585    found in the external database.
 586
 587    ParamHash is a Perl parameter hash which can contain at least the following
 588    fields. These fields are used to populate RT's users database when the user
 589    is created
 590
 591     EmailAddress is the email address that RT should use for this user.
 592     Name is the 'Name' attribute RT should use for this user.
 593          'Name' is used for things like access control and user lookups.
 594     RealName is what RT should display as the user's name when displaying
 595          'friendly' names
 596
 597 =cut
 598
 599 sub LookupExternalUserInfo {
 600   my $self = shift;
 601   my $EmailAddress = shift;
 602   my $RealName = shift;
 603
 604   my $FoundInExternalDatabase = 1;
 605   my %params;
 606
 607   #Name is the RT username you want to use for this user.
 608   $params{'Name'} = $EmailAddress;
 609   $params{'EmailAddress'} = $EmailAddress;
 610   $params{'RealName'} = $RealName;
 611
 612   # See RT's contributed code for examples.
 613   # http://www.fsck.com/pub/rt/contrib/
 614   return ($FoundInExternalDatabase, %params);
 615 }
 616
 617 # }}}
 618
 619 # {{{ Accessor methods for parsed email messages
 620
 621 =head2 Head
 622
 623 Return the parsed head from this message
 624
 625 =cut
 626
 627 sub Head {
 628     my $self = shift;
 629     return $self->Entity->head;
 630 }
 631
 632 =head2 Entity
 633
 634 Return the parsed Entity from this message
 635
 636 =cut
 637
 638 sub Entity {
 639     my $self = shift;
 640     return $self->{'entity'};
 641 }
 642
 643 # }}}
 644 # {{{ _SetupMIMEParser
 645
 646 =head2 _SetupMIMEParser $parser
 647
 648 A private instance method which sets up a mime parser to do its job
 649
 650 =cut
 651
 652
 653     ## TODO: Does it make sense storing to disk at all?  After all, we
 654     ## need to put each msg as an in-core scalar before saving it to
 655     ## the database, don't we?
 656
 657     ## At the same time, we should make sure that we nuke attachments
 658     ## Over max size and return them
 659
 660 sub _SetupMIMEParser {
 661     my $self   = shift;
 662     my $parser = shift;
 663
 664     # Set up output directory for files:
 665
 666     my $tmpdir = File::Temp::tempdir( TMPDIR => 1, CLEANUP => 1 );
 667     push ( @{ $self->{'AttachmentDirs'} }, $tmpdir );
 668     $parser->output_dir($tmpdir);
 669     $parser->filer->ignore_filename(1);
 670
 671     #If someone includes a message, extract it
 672     $parser->extract_nested_messages(1);
 673
 674     $parser->extract_uuencode(1);    ### default is false
 675
 676     # Set up the prefix for files with auto-generated names:
 677     $parser->output_prefix("part");
 678
 679     # do _not_ store each msg as in-core scalar;
 680
 681     $parser->output_to_core(0);
 682
 683     # From the MIME::Parser docs:
 684     # "Normally, tmpfiles are created when needed during parsing, and destroyed automatically when they go out of scope"
 685     # Turns out that the default is to recycle tempfiles
 686     # Temp files should never be recycled, especially when running under perl taint checking
 687
 688     $parser->tmp_recycling(0);
 689
 690
 691 }
 692
 693 # }}}
 694
 695 sub DESTROY {
 696     my $self = shift;
 697     File::Path::rmtree([@{$self->{'AttachmentDirs'}}],0,1);
 698 }
 699
 700
 701
 702 eval "require RT::EmailParser_Vendor";
 703 die $@ if ($@ && $@ !~ qr{^Can't locate RT/EmailParser_Vendor.pm});
 704 eval "require RT::EmailParser_Local";
 705 die $@ if ($@ && $@ !~ qr{^Can't locate RT/EmailParser_Local.pm});
 706
 707 1;