diff options
Diffstat (limited to 'rt/sbin/rt-fulltext-indexer.in')
-rw-r--r-- | rt/sbin/rt-fulltext-indexer.in | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/rt/sbin/rt-fulltext-indexer.in b/rt/sbin/rt-fulltext-indexer.in index 7e31cac84..9ad6d26bd 100644 --- a/rt/sbin/rt-fulltext-indexer.in +++ b/rt/sbin/rt-fulltext-indexer.in @@ -3,7 +3,7 @@ # # COPYRIGHT: # -# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC +# This software is Copyright (c) 1996-2013 Best Practical Solutions, LLC # <sales@bestpractical.com> # # (Except where explicitly superseded by other copyright notices) @@ -217,6 +217,11 @@ sub attachments { VALUE => 'deleted' ); + # On newer DBIx::SearchBuilder's, indicate that making the query DISTINCT + # is unnecessary because the joins won't produce duplicates. This + # drastically improves performance when fetching attachments. + $res->{joins_are_distinct} = 1; + return goto_specific( suffix => $type, error => "Don't know how to find $type attachments", @@ -369,11 +374,18 @@ sub process_pg { my $status = eval { $dbh->do( $query, undef, $$text, $attachment->id ) }; unless ( $status ) { - if ($dbh->errstr =~ /string is too long for tsvector/) { - warn "Attachment @{[$attachment->id]} not indexed, as it contains too many unique words to be indexed"; + if ( $dbh->err == 7 && $dbh->state eq '54000' ) { + warn "Attachment @{[$attachment->id]} cannot be indexed, as it contains too many unique words"; + } elsif ( $dbh->err == 7 && $dbh->state eq '22021' ) { + warn "Attachment @{[$attachment->id]} cannot be indexed, as it contains invalid UTF8 bytes"; } else { die "error: ". $dbh->errstr; } + + # Insert an empty tsvector, so we count this row as "indexed" + # for purposes of knowing where to pick up + eval { $dbh->do( $query, undef, "", $attachment->id ) } + or die "Failed to insert empty tsvector: " . $dbh->errstr; } } |