summaryrefslogtreecommitdiff
path: root/rt/sbin/rt-fulltext-indexer.in
diff options
context:
space:
mode:
Diffstat (limited to 'rt/sbin/rt-fulltext-indexer.in')
-rw-r--r--rt/sbin/rt-fulltext-indexer.in18
1 files changed, 15 insertions, 3 deletions
diff --git a/rt/sbin/rt-fulltext-indexer.in b/rt/sbin/rt-fulltext-indexer.in
index 7e31cac84..9ad6d26bd 100644
--- a/rt/sbin/rt-fulltext-indexer.in
+++ b/rt/sbin/rt-fulltext-indexer.in
@@ -3,7 +3,7 @@
#
# COPYRIGHT:
#
-# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC
+# This software is Copyright (c) 1996-2013 Best Practical Solutions, LLC
# <sales@bestpractical.com>
#
# (Except where explicitly superseded by other copyright notices)
@@ -217,6 +217,11 @@ sub attachments {
VALUE => 'deleted'
);
+ # On newer DBIx::SearchBuilder's, indicate that making the query DISTINCT
+ # is unnecessary because the joins won't produce duplicates. This
+ # drastically improves performance when fetching attachments.
+ $res->{joins_are_distinct} = 1;
+
return goto_specific(
suffix => $type,
error => "Don't know how to find $type attachments",
@@ -369,11 +374,18 @@ sub process_pg {
my $status = eval { $dbh->do( $query, undef, $$text, $attachment->id ) };
unless ( $status ) {
- if ($dbh->errstr =~ /string is too long for tsvector/) {
- warn "Attachment @{[$attachment->id]} not indexed, as it contains too many unique words to be indexed";
+ if ( $dbh->err == 7 && $dbh->state eq '54000' ) {
+ warn "Attachment @{[$attachment->id]} cannot be indexed, as it contains too many unique words";
+ } elsif ( $dbh->err == 7 && $dbh->state eq '22021' ) {
+ warn "Attachment @{[$attachment->id]} cannot be indexed, as it contains invalid UTF8 bytes";
} else {
die "error: ". $dbh->errstr;
}
+
+ # Insert an empty tsvector, so we count this row as "indexed"
+ # for purposes of knowing where to pick up
+ eval { $dbh->do( $query, undef, "", $attachment->id ) }
+ or die "Failed to insert empty tsvector: " . $dbh->errstr;
}
}