diff options
Diffstat (limited to 'rt/sbin/rt-setup-fulltext-index')
-rwxr-xr-x | rt/sbin/rt-setup-fulltext-index | 720 |
1 files changed, 0 insertions, 720 deletions
diff --git a/rt/sbin/rt-setup-fulltext-index b/rt/sbin/rt-setup-fulltext-index deleted file mode 100755 index e27a27010..000000000 --- a/rt/sbin/rt-setup-fulltext-index +++ /dev/null @@ -1,720 +0,0 @@ -#!/usr/bin/perl -# BEGIN BPS TAGGED BLOCK {{{ -# -# COPYRIGHT: -# -# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC -# <sales@bestpractical.com> -# -# (Except where explicitly superseded by other copyright notices) -# -# -# LICENSE: -# -# This work is made available to you under the terms of Version 2 of -# the GNU General Public License. A copy of that license should have -# been provided with this software, but in any event can be snarfed -# from www.gnu.org. -# -# This work is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301 or visit their web page on the internet at -# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html. -# -# -# CONTRIBUTION SUBMISSION POLICY: -# -# (The following paragraph is not intended to limit the rights granted -# to you to modify and distribute this software under the terms of -# the GNU General Public License and is only of importance to you if -# you choose to contribute your changes and enhancements to the -# community by submitting them to Best Practical Solutions, LLC.) -# -# By intentionally submitting any modifications, corrections or -# derivatives to this work, or any other work intended for use with -# Request Tracker, to Best Practical Solutions, LLC, you confirm that -# you are the copyright holder for those contributions and you grant -# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable, -# royalty-free, perpetual, license to use, copy, create derivative -# works based on those contributions, and sublicense and distribute -# those contributions and any derivatives thereof. -# -# END BPS TAGGED BLOCK }}} -use strict; -use warnings; -no warnings 'once'; - -# fix lib paths, some may be relative -BEGIN { - require File::Spec; - my @libs = ("/opt/rt3/lib", "/opt/rt3/local/lib"); - my $bin_path; - - for my $lib (@libs) { - unless ( File::Spec->file_name_is_absolute($lib) ) { - unless ($bin_path) { - if ( File::Spec->file_name_is_absolute(__FILE__) ) { - $bin_path = ( File::Spec->splitpath(__FILE__) )[1]; - } - else { - require FindBin; - no warnings "once"; - $bin_path = $FindBin::Bin; - } - } - $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib ); - } - unshift @INC, $lib; - } -} - -BEGIN { - use RT; - RT::LoadConfig(); - RT::Init(); -}; -use RT::Interface::CLI (); - -my %DB = ( - type => scalar RT->Config->Get('DatabaseType'), - user => scalar RT->Config->Get('DatabaseUser'), - admin => 'freeside', - admin_password => undef, -); - -my %OPT = ( - help => 0, - ask => 1, - dryrun => 0, - attachments => 1, -); - -my %DEFAULT; -if ( $DB{'type'} eq 'Pg' ) { - %DEFAULT = ( - table => 'Attachments', - column => 'ContentIndex', - ); -} -elsif ( $DB{'type'} eq 'mysql' ) { - %DEFAULT = ( - table => 'AttachmentsIndex', - ); -} -elsif ( $DB{'type'} eq 'Oracle' ) { - %DEFAULT = ( - prefix => 'rt_fts_', - ); -} - -use Getopt::Long qw(GetOptions); -GetOptions( - 'h|help!' => \$OPT{'help'}, - 'ask!' => \$OPT{'ask'}, - 'dry-run!' => \$OPT{'dryrun'}, - 'attachments!' => \$OPT{'attachments'}, - - 'table=s' => \$OPT{'table'}, - 'column=s' => \$OPT{'column'}, - 'url=s' => \$OPT{'url'}, - 'maxmatches=i' => \$OPT{'maxmatches'}, - 'index-type=s' => \$OPT{'index-type'}, - - 'dba=s' => \$DB{'admin'}, - 'dba-password=s' => \$DB{'admin_password'}, -) or show_help(); - -if ( $OPT{'help'} || (!$DB{'admin'} && $DB{'type'} eq 'Oracle' ) ) { - show_help( !$OPT{'help'} ); -} - -my $dbh = $RT::Handle->dbh; -$dbh->{'RaiseError'} = 1; -$dbh->{'PrintError'} = 1; - -if ( $DB{'type'} eq 'mysql' ) { - check_sphinx(); - my $table = $OPT{'table'} || prompt( - message => "Enter name of a new MySQL table that will be used to connect to the\n" - . "Sphinx server:", - default => $DEFAULT{'table'}, - silent => !$OPT{'ask'}, - ); - - my $url = 'sphinx://localhost:3312/rt'; - my $version = ($dbh->selectrow_array("show variables like 'version'"))[1]; - $url = 'sphinx://127.0.0.1:3312/rt' - if $version and $version =~ /^(\d+\.\d+)/ and $1 >= 5.5; - - $url = $OPT{'url'} || prompt( - message => "Enter URL of the sphinx search server; this should be of the form\n" - . "sphinx://<server>:<port>/<index name>", - default => $url, - silent => !$OPT{'ask'}, - ); - my $maxmatches = $OPT{'maxmatches'} || prompt( - message => "Maximum number of matches to return; this is the maximum number of\n" - . "attachment records returned by the search, not the maximum number\n" - . "of tickets. Both your RT_SiteConfig.pm and your sphinx.conf must\n" - . "agree on this value. Larger values cause your Sphinx server to\n" - . "consume more memory and CPU time per query.", - default => 10000, - silent => !$OPT{'ask'}, - ); - - my $schema = <<END; -CREATE TABLE $table ( - id INTEGER UNSIGNED NOT NULL, - weight INTEGER NOT NULL, - query VARCHAR(3072) NOT NULL, - INDEX(query) -) ENGINE=SPHINX CONNECTION="$url" CHARACTER SET utf8 -END - - do_error_is_ok( dba_handle() => "DROP TABLE $table" ) - unless $OPT{'dryrun'}; - insert_schema( $schema ); - - print_rt_config( Table => $table, MaxMatches => $maxmatches ); - - require URI; - my $urlo = URI->new( $url ); - my ($host, $port) = split /:/, $urlo->authority; - my $index = $urlo->path; - $index =~ s{^/+}{}; - - my $var_path = $RT::VarPath; - - my %sphinx_conf = (); - $sphinx_conf{'host'} = RT->Config->Get('DatabaseHost'); - $sphinx_conf{'db'} = RT->Config->Get('DatabaseName'); - $sphinx_conf{'user'} = RT->Config->Get('DatabaseUser'); - $sphinx_conf{'pass'} = RT->Config->Get('DatabasePassword'); - - print <<END - -Below is a simple Sphinx configuration which can be used to index all -text/plain attachments in your database. This configuration is not -ideal; you should read the Sphinx documentation to understand how to -configure it to better suit your needs. - -source rt { - type = mysql - - sql_host = $sphinx_conf{'host'} - sql_db = $sphinx_conf{'db'} - sql_user = $sphinx_conf{'user'} - sql_pass = $sphinx_conf{'pass'} - - sql_query_pre = SET NAMES utf8 - sql_query = \\ - SELECT a.id, a.content FROM Attachments a \\ - JOIN Transactions txn ON a.TransactionId = txn.id AND txn.ObjectType = 'RT::Ticket' \\ - JOIN Tickets t ON txn.ObjectId = t.id \\ - WHERE a.ContentType = 'text/plain' AND t.Status != 'deleted' - - sql_query_info = SELECT * FROM Attachments WHERE id=\$id -} - -index $index { - source = rt - path = $var_path/sphinx/index - docinfo = extern - charset_type = utf-8 -} - -indexer { - mem_limit = 32M -} - -searchd { - port = $port - log = $var_path/sphinx/searchd.log - query_log = $var_path/sphinx/query.log - read_timeout = 5 - max_children = 30 - pid_file = $var_path/sphinx/searchd.pid - max_matches = $maxmatches - seamless_rotate = 1 - preopen_indexes = 0 - unlink_old = 1 -} - -END - -} -elsif ( $DB{'type'} eq 'Pg' ) { - check_tsvalue(); - my $table = $OPT{'table'} || prompt( - message => "Enter the name of a DB table that will be used to store the Pg tsvector.\n" - . "You may either use the existing Attachments table, or create a new\n" - . "table.", - default => $DEFAULT{'table'}, - silent => !$OPT{'ask'}, - ); - my $column = $OPT{'column'} || prompt( - message => 'Enter the name of a column that will be used to store the Pg tsvector:', - default => $DEFAULT{'column'}, - silent => !$OPT{'ask'}, - ); - - my $schema; - my $drop; - if ( lc($table) eq 'attachments' ) { - $drop = "ALTER TABLE $table DROP COLUMN $column"; - $schema = "ALTER TABLE $table ADD COLUMN $column tsvector"; - } else { - $drop = "DROP TABLE $table"; - $schema = "CREATE TABLE $table ( " - ."id INTEGER NOT NULL," - ."$column tsvector )"; - } - - my $index_type = lc($OPT{'index-type'} || ''); - while ( $index_type ne 'gist' and $index_type ne 'gin' ) { - $index_type = lc prompt( - message => "You may choose between GiST or GIN indexes; the former is several times\n" - . "slower to search, but takes less space on disk and is faster to update.", - default => 'GiST', - silent => !$OPT{'ask'}, - ); - } - - do_error_is_ok( dba_handle() => $drop ) - unless $OPT{'dryrun'}; - insert_schema( $schema ); - insert_schema("CREATE INDEX ${column}_idx ON $table USING $index_type($column)"); - - print_rt_config( Table => $table, Column => $column ); -} -elsif ( $DB{'type'} eq 'Oracle' ) { - { - my $dbah = dba_handle(); - do_print_error( $dbah => 'GRANT CTXAPP TO '. $DB{'user'} ); - do_print_error( $dbah => 'GRANT EXECUTE ON CTXSYS.CTX_DDL TO '. $DB{'user'} ); - } - - my %PREFERENCES = ( - datastore => { - type => 'DIRECT_DATASTORE', - }, - filter => { - type => 'AUTO_FILTER', -# attributes => { -# timeout => 120, # seconds -# timeout_type => 'HEURISTIC', # or 'FIXED' -# }, - }, - lexer => { - type => 'WORLD_LEXER', - }, - word_list => { - type => 'BASIC_WORDLIST', - attributes => { - stemmer => 'AUTO', - fuzzy_match => 'AUTO', -# fuzzy_score => undef, -# fuzzy_numresults => undef, -# substring_index => undef, -# prefix_index => undef, -# prefix_length_min => undef, -# prefix_length_max => undef, -# wlidcard_maxterms => undef, - }, - }, - 'section_group' => { - type => 'NULL_SECTION_GROUP', - }, - - storage => { - type => 'BASIC_STORAGE', - attributes => { - R_TABLE_CLAUSE => 'lob (data) store as (cache)', - I_INDEX_CLAUSE => 'compress 2', - }, - }, - ); - - my @params = (); - push @params, ora_create_datastore( %{ $PREFERENCES{'datastore'} } ); - push @params, ora_create_filter( %{ $PREFERENCES{'filter'} } ); - push @params, ora_create_lexer( %{ $PREFERENCES{'lexer'} } ); - push @params, ora_create_word_list( %{ $PREFERENCES{'word_list'} } ); - push @params, ora_create_stop_list(); - push @params, ora_create_section_group( %{ $PREFERENCES{'section_group'} } ); - push @params, ora_create_storage( %{ $PREFERENCES{'storage'} } ); - - my $index_params = join "\n", @params; - my $index_name = $DEFAULT{prefix} .'index'; - do_error_is_ok( $dbh => "DROP INDEX $index_name" ) - unless $OPT{'dryrun'}; - $dbh->do( - "CREATE INDEX $index_name ON Attachments(Content) - indextype is ctxsys.context parameters(' - $index_params - ')", - ) unless $OPT{'dryrun'}; - - print_rt_config( IndexName => $index_name ); -} -else { - die "Full-text indexes on $DB{type} are not yet supported"; -} - -sub check_tsvalue { - my $dbh = $RT::Handle->dbh; - my $fts = ($dbh->selectrow_array(<<EOQ))[0]; -SELECT 1 FROM information_schema.routines WHERE routine_name = 'plainto_tsquery' -EOQ - unless ($fts) { - print STDERR <<EOT; - -Your PostgreSQL server does not include full-text support. You will -need to upgrade to PostgreSQL version 8.3 or higher to use full-text -indexing. - -EOT - exit 1; - } -} - -sub check_sphinx { - return if $RT::Handle->CheckSphinxSE; - - print STDERR <<EOT; - -Your MySQL server has not been compiled with the Sphinx storage engine -(sphinxse). You will need to recompile MySQL according to the -instructions in Sphinx's documentation at -http://sphinxsearch.com/docs/current.html#sphinxse-installing - -EOT - exit 1; -} - -sub ora_create_datastore { - return sprintf 'datastore %s', ora_create_preference( - @_, - name => 'datastore', - ); -} - -sub ora_create_filter { - my $res = ''; - $res .= sprintf "format column %s\n", ora_create_format_column(); - $res .= sprintf 'filter %s', ora_create_preference( - @_, - name => 'filter', - ); - return $res; -} - -sub ora_create_lexer { - return sprintf 'lexer %s', ora_create_preference( - @_, - name => 'lexer', - ); -} - -sub ora_create_word_list { - return sprintf 'wordlist %s', ora_create_preference( - @_, - name => 'word_list', - ); -} - -sub ora_create_stop_list { - my $file = shift || 'etc/stopwords/en.txt'; - return '' unless -e $file; - - my $name = $DEFAULT{'prefix'} .'stop_list'; - unless ($OPT{'dryrun'}) { - do_error_is_ok( $dbh => 'begin ctx_ddl.drop_stoplist(?); end;', $name ); - - $dbh->do( - 'begin ctx_ddl.create_stoplist(?, ?); end;', - undef, $name, 'BASIC_STOPLIST' - ); - - open( my $fh, '<:utf8', $file ) - or die "couldn't open file '$file': $!"; - while ( my $word = <$fh> ) { - chomp $word; - $dbh->do( - 'begin ctx_ddl.add_stopword(?, ?); end;', - undef, $name, $word - ); - } - close $fh; - } - return sprintf 'stoplist %s', $name; -} - -sub ora_create_section_group { - my %args = @_; - my $name = $DEFAULT{'prefix'} .'section_group'; - unless ($OPT{'dryrun'}) { - do_error_is_ok( $dbh => 'begin ctx_ddl.drop_section_group(?); end;', $name ); - $dbh->do( - 'begin ctx_ddl.create_section_group(?, ?); end;', - undef, $name, $args{'type'} - ); - } - return sprintf 'section group %s', $name; -} - -sub ora_create_storage { - return sprintf 'storage %s', ora_create_preference( - @_, - name => 'storage', - ); -} - -sub ora_create_format_column { - my $column_name = 'ContentOracleFormat'; - return $column_name if $OPT{'dryrun'}; - unless ( - $dbh->column_info( - undef, undef, uc('Attachments'), uc( $column_name ) - )->fetchrow_array - ) { - $dbh->do(qq{ - ALTER TABLE Attachments ADD $column_name VARCHAR2(10) - }); - } - - my $detect_format = qq{ - CREATE OR REPLACE FUNCTION $DEFAULT{prefix}detect_format_simple( - parent IN NUMBER, - type IN VARCHAR2, - encoding IN VARCHAR2, - fname IN VARCHAR2 - ) - RETURN VARCHAR2 - AS - format VARCHAR2(10); - BEGIN - format := CASE - }; - unless ( $OPT{'attachments'} ) { - $detect_format .= qq{ - WHEN fname IS NOT NULL THEN 'ignore' - }; - } - $detect_format .= qq{ - WHEN type = 'text' THEN 'text' - WHEN type = 'text/rtf' THEN 'ignore' - WHEN type LIKE 'text/%' THEN 'text' - WHEN type LIKE 'message/%' THEN 'text' - ELSE 'ignore' - END; - RETURN format; - END; - }; - ora_create_procedure( $detect_format ); - - $dbh->do(qq{ - UPDATE Attachments - SET $column_name = $DEFAULT{prefix}detect_format_simple( - Parent, - ContentType, ContentEncoding, - Filename - ) - WHERE $column_name IS NULL - }); - $dbh->do(qq{ - CREATE OR REPLACE TRIGGER $DEFAULT{prefix}set_format - BEFORE INSERT - ON Attachments - FOR EACH ROW - BEGIN - :new.$column_name := $DEFAULT{prefix}detect_format_simple( - :new.Parent, - :new.ContentType, :new.ContentEncoding, - :new.Filename - ); - END; - }); - return $column_name; -} - -sub ora_create_preference { - my %info = @_; - my $name = $DEFAULT{'prefix'} . $info{'name'}; - return $name if $OPT{'dryrun'}; - do_error_is_ok( $dbh => 'begin ctx_ddl.drop_preference(?); end;', $name ); - $dbh->do( - 'begin ctx_ddl.create_preference(?, ?); end;', - undef, $name, $info{'type'} - ); - return $name unless $info{'attributes'}; - - while ( my ($attr, $value) = each %{ $info{'attributes'} } ) { - $dbh->do( - 'begin ctx_ddl.set_attribute(?, ?, ?); end;', - undef, $name, $attr, $value - ); - } - - return $name; -} - -sub ora_create_procedure { - my $text = shift; - - return if $OPT{'dryrun'}; - my $status = $dbh->do($text, { RaiseError => 0 }); - - # Statement succeeded - return if $status; - - if ( 6550 != $dbh->err ) { - # Utter failure - die $dbh->errstr; - } - else { - my $msg = $dbh->func( 'plsql_errstr' ); - die $dbh->errstr if !defined $msg; - die $msg if $msg; - } -} - -sub dba_handle { - if ( $DB{'type'} eq 'Oracle' ) { - $ENV{'NLS_LANG'} = "AMERICAN_AMERICA.AL32UTF8"; - $ENV{'NLS_NCHAR'} = "AL32UTF8"; - } - my $dsn = do { my $h = new RT::Handle; $h->BuildDSN; $h->DSN }; - my $dbh = DBI->connect( - $dsn, $DB{admin}, $DB{admin_password}, - { RaiseError => 1, PrintError => 1 }, - ); - unless ( $dbh ) { - die "Failed to connect to $dsn as user '$DB{admin}': ". $DBI::errstr; - } - return $dbh; -} - -sub do_error_is_ok { - my $dbh = shift; - local $dbh->{'RaiseError'} = 0; - local $dbh->{'PrintError'} = 0; - return $dbh->do(shift, undef, @_); -} - -sub do_print_error { - my $dbh = shift; - local $dbh->{'RaiseError'} = 0; - local $dbh->{'PrintError'} = 1; - return $dbh->do(shift, undef, @_); -} - -sub prompt { - my %args = ( @_ ); - return $args{'default'} if $args{'silent'}; - - local $| = 1; - print $args{'message'}; - if ( $args{'default'} ) { - print "\n[". $args{'default'} .']: '; - } else { - print ":\n"; - } - - my $res = <STDIN>; - chomp $res; - print "\n"; - return $args{'default'} if !$res && $args{'default'}; - return $res; -} - -sub verbose { print @_, "\n" if $OPT{verbose} || $OPT{verbose}; 1 } -sub debug { print @_, "\n" if $OPT{debug}; 1 } -sub error { $RT::Logger->error( @_ ); verbose(@_); 1 } -sub warning { $RT::Logger->warning( @_ ); verbose(@_); 1 } - -sub show_help { - my $error = shift; - RT::Interface::CLI->ShowHelp( - ExitValue => $error, - Sections => 'NAME|DESCRIPTION', - ); -} - -sub print_rt_config { - my %args = @_; - my $config = <<END; - -You can now configure RT to use the newly-created full-text index by -adding the following to your RT_SiteConfig.pm: - -Set( %FullTextSearch, - Enable => 1, - Indexed => 1, -END - - $config .= sprintf(" %-10s => '$args{$_}',\n",$_) - foreach grep defined $args{$_}, keys %args; - $config .= ");\n"; - - print $config; -} - -sub insert_schema { - my $dbh = dba_handle(); - my $message = "Going to run the following in the DB:"; - my $schema = shift; - print "$message\n"; - my $disp = $schema; - $disp =~ s/^/ /mg; - print "$disp\n\n"; - return if $OPT{'dryrun'}; - - my $res = $dbh->do( $schema ); - unless ( $res ) { - die "Couldn't run DDL query: ". $dbh->errstr; - } -} - -=head1 NAME - -rt-setup-fulltext-index - Create indexes for full text search - -=head1 DESCRIPTION - -This script creates the appropriate tables, columns, functions, and / or -views necessary for full-text searching for your database type. It will -drop any existing indexes in the process. - -Please read F<docs/full_text_indexing.pod> for complete documentation on -full-text indexing for your database type. - -If you have a non-standard database administrator user or password, you -may use the C<--dba> and C<--dba-password> parameters to set them -explicitly: - - rt-setup-fulltext-index --dba sysdba --dba-password 'secret' - -To test what will happen without running any DDL, pass the C<--dryrun> -flag. - -The Oracle index determines which content-types it will index at -creation time. By default, textual message bodies and textual uploaded -attachments (attachments with filenames) are indexed; to ignore textual -attachments, pass the C<--no-attachments> flag when the index is -created. - - -=head1 AUTHOR - -Ruslan Zakirov E<lt>ruz@bestpractical.comE<gt>, -Alex Vandiver E<lt>alexmv@bestpractical.comE<gt> - -=cut - |