rt/lib/RT/EmailParser.pm

   1 # BEGIN LICENSE BLOCK
   2 #
   3 # Copyright (c) 1996-2003 Jesse Vincent <jesse@bestpractical.com>
   4 #
   5 # (Except where explictly superceded by other copyright notices)
   6 #
   7 # This work is made available to you under the terms of Version 2 of
   8 # the GNU General Public License. A copy of that license should have
   9 # been provided with this software, but in any event can be snarfed
  10 # from www.gnu.org.
  11 #
  12 # This work is distributed in the hope that it will be useful, but
  13 # WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 # General Public License for more details.
  16 #
  17 # Unless otherwise specified, all modifications, corrections or
  18 # extensions to this work which alter its source code become the
  19 # property of Best Practical Solutions, LLC when submitted for
  20 # inclusion in the work.
  21 #
  22 #
  23 # END LICENSE BLOCK
  24 package RT::EmailParser;
  25
  26
  27 use base qw/RT::Base/;
  28
  29 use strict;
  30 use Mail::Address;
  31 use MIME::Entity;
  32 use MIME::Head;
  33 use MIME::Parser;
  34 use File::Temp qw/tempdir/;
  35
  36 =head1 NAME
  37
  38   RT::EmailParser - helper functions for parsing parts from incoming
  39   email messages
  40
  41 =head1 SYNOPSIS
  42
  43
  44 =head1 DESCRIPTION
  45
  46
  47 =begin testing
  48
  49 ok(require RT::EmailParser);
  50
  51 =end testing
  52
  53
  54 =head1 METHODS
  55
  56 =head2 new
  57
  58
  59 =cut
  60
  61 sub new  {
  62   my $proto = shift;
  63   my $class = ref($proto) || $proto;
  64   my $self  = {};
  65   bless ($self, $class);
  66   return $self;
  67 }
  68
  69
  70
  71 # {{{ sub debug
  72
  73 sub debug {
  74     my $val = shift;
  75     my ($debug);
  76     if ($val) {
  77         $RT::Logger->debug( $val . "\n" );
  78         if ($debug) {
  79             print STDERR "$val\n";
  80         }
  81     }
  82     if ($debug) {
  83         return (1);
  84     }
  85 }
  86
  87 # }}}
  88
  89 # {{{ sub CheckForLoops
  90
  91 sub CheckForLoops {
  92     my $self = shift;
  93
  94     my $head = $self->Head;
  95
  96     #If this instance of RT sent it our, we don't want to take it in
  97     my $RTLoop = $head->get("X-RT-Loop-Prevention") || "";
  98     chomp($RTLoop);    #remove that newline
  99     if ( $RTLoop =~ /^\Q$RT::rtname\E/o ) {
 100         return (1);
 101     }
 102
 103     # TODO: We might not trap the case where RT instance A sends a mail
 104     # to RT instance B which sends a mail to ...
 105     return (undef);
 106 }
 107
 108 # }}}
 109
 110 # {{{ sub CheckForSuspiciousSender
 111
 112 sub CheckForSuspiciousSender {
 113     my $self = shift;
 114
 115     #if it's from a postmaster or mailer daemon, it's likely a bounce.
 116
 117     #TODO: better algorithms needed here - there is no standards for
 118     #bounces, so it's very difficult to separate them from anything
 119     #else.  At the other hand, the Return-To address is only ment to be
 120     #used as an error channel, we might want to put up a separate
 121     #Return-To address which is treated differently.
 122
 123     #TODO: search through the whole email and find the right Ticket ID.
 124
 125     my ( $From, $junk ) = $self->ParseSenderAddressFromHead();
 126
 127     if ( ( $From =~ /^mailer-daemon/i ) or ( $From =~ /^postmaster/i ) ) {
 128         return (1);
 129
 130     }
 131
 132     return (undef);
 133
 134 }
 135
 136 # }}}
 137
 138 # {{{ sub CheckForAutoGenerated
 139 sub CheckForAutoGenerated {
 140     my $self = shift;
 141     my $head = $self->Head;
 142
 143     my $Precedence = $head->get("Precedence") || "";
 144     if ( $Precedence =~ /^(bulk|junk)/i ) {
 145         return (1);
 146     }
 147     else {
 148         return (undef);
 149     }
 150 }
 151
 152 # }}}
 153
 154 # {{{ sub ParseMIMEEntityFromSTDIN
 155
 156 sub ParseMIMEEntityFromSTDIN {
 157     my $self = shift;
 158     return $self->ParseMIMEEntityFromFileHandle(\*STDIN);
 159 }
 160
 161 # }}}
 162
 163 =head2 ParseMIMEEntityFromScalar  $message
 164
 165 Takes either a scalar or a reference to a scalr which contains a stringified MIME message.
 166 Parses it.
 167
 168 Returns true if it wins.
 169 Returns false if it loses.
 170
 171
 172 =cut
 173
 174 sub ParseMIMEEntityFromScalar {
 175     my $self = shift;
 176     my $message = shift;
 177
 178     $self->_DoParse('parse_data', $message);
 179
 180 }
 181
 182 # {{{ ParseMIMEEntityFromFilehandle *FH
 183
 184 =head2 ParseMIMEEntityFromFilehandle *FH
 185
 186 Parses a mime entity from a filehandle passed in as an argument
 187
 188 =cut
 189
 190 sub ParseMIMEEntityFromFileHandle {
 191     my $self = shift;
 192     my $filehandle = shift;
 193
 194     $self->_DoParse('parse', $filehandle);
 195
 196 }
 197
 198 # }}}
 199
 200 # {{{ ParseMIMEEntityFromFile
 201
 202 =head2 ParseMIMEEntityFromFile
 203
 204 Parses a mime entity from a filename passed in as an argument
 205
 206 =cut
 207
 208 sub ParseMIMEEntityFromFile {
 209     my $self = shift;
 210
 211     my $file = shift;
 212     $self->_DoParse('parse_open', $file);
 213 }
 214
 215 # }}}
 216
 217 # {{{ _DoParse
 218
 219 =head2 _DoParse PARSEMETHOD CONTENT
 220
 221
 222 A helper for the various parsers to turn around and do the dispatch to the actual parser
 223
 224 =cut
 225
 226 sub _DoParse {
 227     my $self = shift;
 228     my $method = shift;
 229     my $file = shift;
 230
 231     # Create a new parser object:
 232
 233     my $parser = MIME::Parser->new();
 234     $self->_SetupMIMEParser($parser);
 235
 236
 237     # TODO: XXX 3.0 we really need to wrap this in an eval { }
 238
 239     unless ( $self->{'entity'} = $parser->$method($file) ) {
 240
 241         # Try again, this time without extracting nested messages
 242         $parser->extract_nested_messages(0);
 243         unless ( $self->{'entity'} = $parser->$method($file) ) {
 244             $RT::Logger->crit("couldn't parse MIME stream");
 245             return ( undef);
 246         }
 247     }
 248     $self->_PostProcessNewEntity();
 249     return (1);
 250 }
 251
 252 # }}}
 253
 254
 255 # {{{ _PostProcessNewEntity
 256
 257 =head2 _PostProcessNewEntity
 258
 259 cleans up and postprocesses a newly parsed MIME Entity
 260
 261 =cut
 262
 263 sub _PostProcessNewEntity {
 264     my $self = shift;
 265
 266     #Now we've got a parsed mime object.
 267
 268     # try to convert text parts into utf-8 charset
 269     RT::I18N::SetMIMEEntityToEncoding($self->{'entity'}, 'utf-8');
 270
 271
 272     # Unfold headers that are have embedded newlines
 273     $self->Head->unfold;
 274
 275
 276 }
 277
 278 # }}}
 279
 280 # {{{ sub ParseTicketId
 281
 282 sub ParseTicketId {
 283     my $self = shift;
 284
 285     my $Subject = shift;
 286
 287     if ( $Subject =~ s/\[\Q$RT::rtname\E\s+\#(\d+)\s*\]//i ) {
 288         my $id = $1;
 289         $RT::Logger->debug("Found a ticket ID. It's $id");
 290         return ($id);
 291     }
 292     else {
 293         return (undef);
 294     }
 295 }
 296
 297 # }}}
 298
 299
 300
 301 # {{{ ParseCcAddressesFromHead
 302
 303 =head2 ParseCcAddressesFromHead HASHREF
 304
 305 Takes a hashref object containing QueueObj, Head and CurrentUser objects.
 306 Returns a list of all email addresses in the To and Cc
 307 headers b<except> the current Queue\'s email addresses, the CurrentUser\'s
 308 email address  and anything that the $RTAddressRegexp matches.
 309
 310 =cut
 311
 312 sub ParseCcAddressesFromHead {
 313
 314     my $self = shift;
 315
 316     my %args = (
 317         QueueObj    => undef,
 318         CurrentUser => undef,
 319         @_
 320     );
 321
 322     my (@Addresses);
 323
 324     my @ToObjs = Mail::Address->parse( $self->Head->get('To') );
 325     my @CcObjs = Mail::Address->parse( $self->Head->get('Cc') );
 326
 327     foreach my $AddrObj ( @ToObjs, @CcObjs ) {
 328         my $Address = $AddrObj->address;
 329         my $user = RT::User->new($RT::SystemUser);
 330         $Address = $user->CanonicalizeEmailAddress($Address);
 331         next if ( $args{'CurrentUser'}->EmailAddress   =~ /^$Address$/i );
 332         next if ( $args{'QueueObj'}->CorrespondAddress =~ /^$Address$/i );
 333         next if ( $args{'QueueObj'}->CommentAddress    =~ /^$Address$/i );
 334         next if ( IsRTAddress($Address) );
 335
 336         push ( @Addresses, $Address );
 337     }
 338     return (@Addresses);
 339 }
 340
 341 # }}}
 342
 343 # {{{ ParseSenderAdddressFromHead
 344
 345 =head2 ParseSenderAddressFromHead
 346
 347 Takes a MIME::Header object. Returns a tuple: (user@host, friendly name)
 348 of the From (evaluated in order of Reply-To:, From:, Sender)
 349
 350 =cut
 351
 352 sub ParseSenderAddressFromHead {
 353     my $self = shift;
 354
 355     #Figure out who's sending this message.
 356     my $From = $self->Head->get('Reply-To')
 357       || $self->Head->get('From')
 358       || $self->Head->get('Sender');
 359     return ( $self->ParseAddressFromHeader($From) );
 360 }
 361
 362 # }}}
 363
 364 # {{{ ParseErrorsToAdddressFromHead
 365
 366 =head2 ParseErrorsToAddressFromHead
 367
 368 Takes a MIME::Header object. Return a single value : user@host
 369 of the From (evaluated in order of Errors-To:,Reply-To:, From:, Sender)
 370
 371 =cut
 372
 373 sub ParseErrorsToAddressFromHead {
 374     my $self = shift;
 375
 376     #Figure out who's sending this message.
 377
 378     foreach my $header ( 'Errors-To', 'Reply-To', 'From', 'Sender' ) {
 379
 380         # If there's a header of that name
 381         my $headerobj = $self->Head->get($header);
 382         if ($headerobj) {
 383             my ( $addr, $name ) = $self->ParseAddressFromHeader($headerobj);
 384
 385             # If it's got actual useful content...
 386             return ($addr) if ($addr);
 387         }
 388     }
 389 }
 390
 391 # }}}
 392
 393 # {{{ ParseAddressFromHeader
 394
 395 =head2 ParseAddressFromHeader ADDRESS
 396
 397 Takes an address from $self->Head->get('Line') and returns a tuple: user@host, friendly name
 398
 399 =cut
 400
 401 sub ParseAddressFromHeader {
 402     my $self = shift;
 403     my $Addr = shift;
 404
 405     my @Addresses = Mail::Address->parse($Addr);
 406
 407     my $AddrObj = $Addresses[0];
 408
 409     unless ( ref($AddrObj) ) {
 410         return ( undef, undef );
 411     }
 412
 413     my $Name = ( $AddrObj->phrase || $AddrObj->comment || $AddrObj->address );
 414
 415     #Lets take the from and load a user object.
 416     my $Address = $AddrObj->address;
 417
 418     return ( $Address, $Name );
 419 }
 420
 421 # }}}
 422
 423 # {{{ IsRTAddress
 424
 425 =item IsRTaddress ADDRESS
 426
 427 Takes a single parameter, an email address.
 428 Returns true if that address matches the $RTAddressRegexp.
 429 Returns false, otherwise.
 430
 431 =begin testing
 432
 433 is(RT::EmailParser::IsRTAddress("","rt\@example.com"),1, "Regexp matched rt address" );
 434 is(RT::EmailParser::IsRTAddress("","frt\@example.com"),undef, "Regexp didn't match non-rt address" );
 435
 436 =end testing
 437
 438 =cut
 439
 440 sub IsRTAddress {
 441     my $self = shift;
 442     my $address = shift;
 443
 444     # Example: the following rule would tell RT not to Cc
 445     #   "tickets@noc.example.com"
 446     if ( defined($RT::RTAddressRegexp) &&
 447                        $address =~ /$RT::RTAddressRegexp/ ) {
 448         return(1);
 449     } else {
 450         return (undef);
 451     }
 452 }
 453
 454 # }}}
 455
 456
 457 # {{{ CullRTAddresses
 458
 459 =item CullRTAddresses ARRAY
 460
 461 Takes a single argument, an array of email addresses.
 462 Returns the same array with any IsRTAddress()es weeded out.
 463
 464 =begin testing
 465
 466 @before = ("rt\@example.com", "frt\@example.com");
 467 @after = ("frt\@example.com");
 468 ok(eq_array(RT::EmailParser::CullRTAddresses("",@before),@after), "CullRTAddresses only culls RT addresses");
 469
 470 =end testing
 471
 472 =cut
 473
 474 sub CullRTAddresses {
 475     my $self = shift;
 476     my @addresses= (@_);
 477     my @addrlist;
 478
 479     foreach my $addr( @addresses ) {
 480       push (@addrlist, $addr)    unless IsRTAddress("", $addr);
 481     }
 482     return (@addrlist);
 483 }
 484
 485 # }}}
 486
 487
 488 # {{{ LookupExternalUserInfo
 489
 490
 491 # LookupExternalUserInfo is a site-definable method for synchronizing
 492 # incoming users with an external data source.
 493 #
 494 # This routine takes a tuple of EmailAddress and FriendlyName
 495 #   EmailAddress is the user's email address, ususally taken from
 496 #       an email message's From: header.
 497 #   FriendlyName is a freeform string, ususally taken from the "comment"
 498 #       portion of an email message's From: header.
 499 #
 500 # If you define an AutoRejectRequest template, RT will use this
 501 # template for the rejection message.
 502
 503
 504 =item LookupExternalUserInfo
 505
 506  LookupExternalUserInfo is a site-definable method for synchronizing
 507  incoming users with an external data source.
 508
 509  This routine takes a tuple of EmailAddress and FriendlyName
 510     EmailAddress is the user's email address, ususally taken from
 511         an email message's From: header.
 512     FriendlyName is a freeform string, ususally taken from the "comment"
 513         portion of an email message's From: header.
 514
 515  It returns (FoundInExternalDatabase, ParamHash);
 516
 517    FoundInExternalDatabase must  be set to 1 before return if the user was
 518    found in the external database.
 519
 520    ParamHash is a Perl parameter hash which can contain at least the following
 521    fields. These fields are used to populate RT's users database when the user
 522    is created
 523
 524     EmailAddress is the email address that RT should use for this user.
 525     Name is the 'Name' attribute RT should use for this user.
 526          'Name' is used for things like access control and user lookups.
 527     RealName is what RT should display as the user's name when displaying
 528          'friendly' names
 529
 530 =cut
 531
 532 sub LookupExternalUserInfo {
 533   my $self = shift;
 534   my $EmailAddress = shift;
 535   my $RealName = shift;
 536
 537   my $FoundInExternalDatabase = 1;
 538   my %params;
 539
 540   #Name is the RT username you want to use for this user.
 541   $params{'Name'} = $EmailAddress;
 542   $params{'EmailAddress'} = $EmailAddress;
 543   $params{'RealName'} = $RealName;
 544
 545   # See RT's contributed code for examples.
 546   # http://www.fsck.com/pub/rt/contrib/
 547   return ($FoundInExternalDatabase, %params);
 548 }
 549
 550 # }}}
 551
 552 # {{{ Accessor methods for parsed email messages
 553
 554 =head2 Head
 555
 556 Return the parsed head from this message
 557
 558 =cut
 559
 560 sub Head {
 561     my $self = shift;
 562     return $self->Entity->head;
 563 }
 564
 565 =head2 Entity
 566
 567 Return the parsed Entity from this message
 568
 569 =cut
 570
 571 sub Entity {
 572     my $self = shift;
 573     return $self->{'entity'};
 574 }
 575
 576 # }}}
 577 # {{{ _SetupMIMEParser
 578
 579 =head2 _SetupMIMEParser $parser
 580
 581 A private instance method which sets up a mime parser to do its job
 582
 583 =cut
 584
 585
 586     ## TODO: Does it make sense storing to disk at all?  After all, we
 587     ## need to put each msg as an in-core scalar before saving it to
 588     ## the database, don't we?
 589
 590     ## At the same time, we should make sure that we nuke attachments
 591     ## Over max size and return them
 592
 593 sub _SetupMIMEParser {
 594     my $self   = shift;
 595     my $parser = shift;
 596
 597     # Set up output directory for files:
 598
 599     my $tmpdir = File::Temp::tempdir( TMPDIR => 1, CLEANUP => 1 );
 600     push ( @{ $self->{'AttachmentDirs'} }, $tmpdir );
 601     $parser->output_dir($tmpdir);
 602     $parser->filer->ignore_filename(1);
 603
 604     #If someone includes a message, extract it
 605     $parser->extract_nested_messages(1);
 606
 607     $parser->extract_uuencode(1);    ### default is false
 608
 609     # Set up the prefix for files with auto-generated names:
 610     $parser->output_prefix("part");
 611
 612     # do _not_ store each msg as in-core scalar;
 613
 614     $parser->output_to_core(0);
 615 }
 616
 617 # }}}
 618
 619 sub DESTROY {
 620     my $self = shift;
 621     File::Path::rmtree([@{$self->{'AttachmentDirs'}}],0,1);
 622 }
 623
 624
 625
 626 eval "require RT::EmailParser_Vendor";
 627 die $@ if ($@ && $@ !~ qr{^Can't locate RT/EmailParser_Vendor.pm});
 628 eval "require RT::EmailParser_Local";
 629 die $@ if ($@ && $@ !~ qr{^Can't locate RT/EmailParser_Local.pm});
 630
 631 1;