2 # BEGIN BPS TAGGED BLOCK {{{
6 # This software is Copyright (c) 1996-2015 Best Practical Solutions, LLC
7 # <sales@bestpractical.com>
9 # (Except where explicitly superseded by other copyright notices)
14 # This work is made available to you under the terms of Version 2 of
15 # the GNU General Public License. A copy of that license should have
16 # been provided with this software, but in any event can be snarfed
19 # This work is distributed in the hope that it will be useful, but
20 # WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 # General Public License for more details.
24 # You should have received a copy of the GNU General Public License
25 # along with this program; if not, write to the Free Software
26 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 # 02110-1301 or visit their web page on the internet at
28 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
31 # CONTRIBUTION SUBMISSION POLICY:
33 # (The following paragraph is not intended to limit the rights granted
34 # to you to modify and distribute this software under the terms of
35 # the GNU General Public License and is only of importance to you if
36 # you choose to contribute your changes and enhancements to the
37 # community by submitting them to Best Practical Solutions, LLC.)
39 # By intentionally submitting any modifications, corrections or
40 # derivatives to this work, or any other work intended for use with
41 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
42 # you are the copyright holder for those contributions and you grant
43 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
44 # royalty-free, perpetual, license to use, copy, create derivative
45 # works based on those contributions, and sublicense and distribute
46 # those contributions and any derivatives thereof.
48 # END BPS TAGGED BLOCK }}}
53 # fix lib paths, some may be relative
54 BEGIN { # BEGIN RT CMD BOILERPLATE
57 my @libs = ("@RT_LIB_PATH@", "@LOCAL_LIB_PATH@");
61 unless ( File::Spec->file_name_is_absolute($lib) ) {
62 $bin_path ||= ( File::Spec->splitpath(Cwd::abs_path(__FILE__)) )[1];
63 $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib );
71 use RT::Interface::CLI ();
74 use Getopt::Long qw(GetOptions);
75 my %OPT = ( memory => '2M', limit => 0 );
88 RT::Interface::CLI->ShowHelp if $OPT{help};
91 if ( !flock main::DATA, LOCK_EX | LOCK_NB ) {
93 RT::Logger->info("$0 is already running; aborting silently, as requested");
97 print STDERR "$0 is already running\n";
102 my $db_type = RT->Config->Get('DatabaseType');
103 my $fts_config = $ENV{RT_FTS_CONFIG} ? JSON::from_json($ENV{RT_FTS_CONFIG})
104 : RT->Config->Get('FullTextSearch') || {};
105 unless ( $fts_config->{'Enable'} ) {
108 Full text search is disabled in your RT configuration. Run
109 @RT_SBIN_PATH_R@/rt-setup-fulltext-index to configure and enable it.
114 unless ( $fts_config->{'Indexed'} ) {
117 Full text search is enabled in your RT configuration, but not with any
118 full-text database indexing -- hence this tool is not required. Read
119 the documentation for %FullTextSearch in your RT_Config for more details.
125 if ( $db_type eq 'Oracle' ) {
126 my $index = $fts_config->{'IndexName'} || 'rt_fts_index';
127 $RT::Handle->dbh->do(
128 "begin ctx_ddl.sync_index(?, ?); end;", undef,
129 $index, $OPT{'memory'}
132 } elsif ( $fts_config->{Sphinx} ) {
135 Updates to the external Sphinx index are done via running the sphinx
144 # Skip ACL checks. This saves a large number of unnecessary queries
145 # (for tickets, ACLs, and users) which are unnecessary, as we are
146 # running as the system user.
148 no warnings 'redefine';
150 *RT::Attachment::_Value = \&DBIx::SearchBuilder::Record::_Value;
151 *RT::Attachments::Next = \&DBIx::SearchBuilder::Next;
155 if ($db_type eq 'mysql') {
157 } elsif ($db_type eq 'Pg') {
161 sub attachment_loop {
163 my $table = $fts_config->{'Table'};
166 # Fetch in batches of size --limit
168 # Indexes all text/plain and text/html attachments
169 my $attachments = RT::Attachments->new( RT->SystemUser );
171 FIELD => 'ContentType',
173 VALUE => ['text/plain', 'text/html'],
175 $attachments->Limit( FIELD => 'id', OPERATOR => '>', VALUE => $LAST );
176 $attachments->OrderBy( FIELD => 'id', ORDER => 'asc' );
177 $attachments->RowsPerPage( $OPT{'limit'} );
179 # Call back to the DB-specific part
180 $subref->($attachments);
182 $LAST = $attachments->Last->id if $attachments->Count;
184 redo if $OPT{'all'} and $attachments->Count == $OPT{'limit'};
188 sub process_bulk_insert {
189 my $dbh = $RT::Handle->dbh;
190 my ($statement, $error) = @_;
192 # Doing large inserts is faster than individual statements, but
193 # comes at a parsing cost; cache the statement handles (99% of which
194 # will be the same size) for a notable (2x) speed gain.
198 $dbh->prepare($statement->(1));
200 attachment_loop( sub {
201 my ($attachments) = @_;
204 while ( my $a = $attachments->Next ) {
205 debug("Found attachment #". $a->id );
206 my $text = $a->Content // "";
207 HTML::Entities::decode_entities($text) if $a->ContentType eq "text/html";
208 push @insert, $text, $a->id;
211 return unless $found;
213 # $found should be the limit size on all but the last go-around.
214 $sthandles{$found} ||= $dbh->prepare($statement->($found));
216 return if eval { $sthandles{$found}->execute(@insert); };
218 # We can catch and recover from some errors; re-do row-by-row to
219 # know which row had which errors
221 my ($content, $id) = splice(@insert,0,2);
222 next if eval { $sthandles{1}->execute($content, $id); };
223 $error->($id, $content);
225 # If this was a semi-expected error, insert an empty
226 # tsvector, so we count this row as "indexed" for
227 # purposes of knowing where to pick up
228 eval { $sthandles{1}->execute( "", $id ) }
229 or die "Failed to insert empty row for attachment $id: " . $dbh->errstr;
235 my $dbh = $RT::Handle->dbh;
236 my $table = $fts_config->{'Table'};
238 ($LAST) = $dbh->selectrow_array("SELECT MAX(id) FROM $table");
240 my $insert = $fts_config->{Engine} eq "MyISAM" ? "INSERT DELAYED" : "INSERT";
245 return "$insert INTO $table(Content, id) VALUES "
246 . join(", ", ("(?,?)") x $n);
250 if ($dbh->err == 1366 and $dbh->state eq "HY000") {
251 warn "Attachment $id cannot be indexed. Most probably it contains invalid UTF8 bytes. ".
252 "Error: ". $dbh->errstr;
254 die "Attachment $id cannot be indexed: " . $dbh->errstr;
262 if ( $fts_config->{'Table'} ne 'Attachments' ) {
269 sub process_pg_insert {
270 my $dbh = $RT::Handle->dbh;
271 my $table = $fts_config->{'Table'};
272 my $column = $fts_config->{'Column'};
273 ($LAST) = $dbh->selectrow_array("SELECT MAX(id) FROM $table");
278 return "INSERT INTO $table($column, id) VALUES "
279 . join(", ", ("(TO_TSVECTOR(?),?)") x $n);
283 if ( $dbh->err == 7 && $dbh->state eq '54000' ) {
284 warn "Attachment $id cannot be indexed. Most probably it contains too many unique words. ".
285 "Error: ". $dbh->errstr;
286 } elsif ( $dbh->err == 7 && $dbh->state eq '22021' ) {
287 warn "Attachment $id cannot be indexed. Most probably it contains invalid UTF8 bytes. ".
288 "Error: ". $dbh->errstr;
290 die "Attachment $id cannot be indexed: " . $dbh->errstr;
296 sub process_pg_update {
297 my $dbh = $RT::Handle->dbh;
298 my $column = $fts_config->{'Column'};
300 ($LAST) = $dbh->selectrow_array("SELECT MAX(id) FROM Attachments WHERE $column IS NOT NULL");
302 my $sth = $dbh->prepare("UPDATE Attachments SET $column = TO_TSVECTOR(?) WHERE id = ?");
304 attachment_loop( sub {
305 my ($attachments) = @_;
307 while ( my $a = $attachments->Next ) {
308 debug("Found attachment #". $a->id );
310 my $text = $a->Content // "";
311 HTML::Entities::decode_entities($text) if $a->ContentType eq "text/html";
313 push @insert, [$text, $a->id];
316 # Try in one database transaction; if it fails, we roll it back
317 # and try one statement at a time.
321 $ok = eval { $sth->execute( $_->[0], $_->[1] ) };
330 # Things didn't go well. Retry the UPDATE statements one row at
331 # a time, outside of the transaction.
333 my ($content, $id) = ($_->[0], $_->[1]);
334 next if eval { $sth->execute( $content, $id ) };
335 if ( $dbh->err == 7 && $dbh->state eq '54000' ) {
336 warn "Attachment $id cannot be indexed. Most probably it contains too many unique words. ".
337 "Error: ". $dbh->errstr;
338 } elsif ( $dbh->err == 7 && $dbh->state eq '22021' ) {
339 warn "Attachment $id cannot be indexed. Most probably it contains invalid UTF8 bytes. ".
340 "Error: ". $dbh->errstr;
342 die "Attachment $id cannot be indexed: " . $dbh->errstr;
345 # If this was a semi-expected error, insert an empty
346 # tsvector, so we count this row as "indexed" for
347 # purposes of knowing where to pick up
348 eval { $sth->execute( "", $id ) }
349 or die "Failed to insert empty row for attachment $id: " . $dbh->errstr;
356 sub debug { print @_, "\n" if $OPT{debug}; 1 }
357 sub error { $RT::Logger->error(_(@_)); 1 }
358 sub warning { $RT::Logger->warn(_(@_)); 1 }
362 rt-fulltext-indexer - Indexer for full text search
366 This is a helper script to keep full text indexes in sync with data.
367 Read F<docs/full_text_indexing.pod> for complete details on how and when