diff options
Diffstat (limited to 'rt/sbin/rt-serializer.in')
-rw-r--r-- | rt/sbin/rt-serializer.in | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/rt/sbin/rt-serializer.in b/rt/sbin/rt-serializer.in new file mode 100644 index 0000000..0e01744 --- /dev/null +++ b/rt/sbin/rt-serializer.in @@ -0,0 +1,399 @@ +#!@PERL@ +# BEGIN BPS TAGGED BLOCK {{{ +# +# COPYRIGHT: +# +# This software is Copyright (c) 1996-2015 Best Practical Solutions, LLC +# <sales@bestpractical.com> +# +# (Except where explicitly superseded by other copyright notices) +# +# +# LICENSE: +# +# This work is made available to you under the terms of Version 2 of +# the GNU General Public License. A copy of that license should have +# been provided with this software, but in any event can be snarfed +# from www.gnu.org. +# +# This work is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 or visit their web page on the internet at +# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html. +# +# +# CONTRIBUTION SUBMISSION POLICY: +# +# (The following paragraph is not intended to limit the rights granted +# to you to modify and distribute this software under the terms of +# the GNU General Public License and is only of importance to you if +# you choose to contribute your changes and enhancements to the +# community by submitting them to Best Practical Solutions, LLC.) +# +# By intentionally submitting any modifications, corrections or +# derivatives to this work, or any other work intended for use with +# Request Tracker, to Best Practical Solutions, LLC, you confirm that +# you are the copyright holder for those contributions and you grant +# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable, +# royalty-free, perpetual, license to use, copy, create derivative +# works based on those contributions, and sublicense and distribute +# those contributions and any derivatives thereof. +# +# END BPS TAGGED BLOCK }}} +use strict; +use warnings; + +# fix lib paths, some may be relative +BEGIN { + require File::Spec; + my @libs = ("@RT_LIB_PATH@", "@LOCAL_LIB_PATH@"); + my $bin_path; + + for my $lib (@libs) { + unless ( File::Spec->file_name_is_absolute($lib) ) { + unless ($bin_path) { + if ( File::Spec->file_name_is_absolute(__FILE__) ) { + $bin_path = ( File::Spec->splitpath(__FILE__) )[1]; + } + else { + require FindBin; + no warnings "once"; + $bin_path = $FindBin::Bin; + } + } + $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib ); + } + unshift @INC, $lib; + } + +} + +use RT; +RT::LoadConfig(); +RT::Init(); + +@RT::Record::ISA = qw( DBIx::SearchBuilder::Record RT::Base ); + +use RT::Migrate; +use RT::Migrate::Serializer::File; +use Getopt::Long; +use Pod::Usage qw//; +use Time::HiRes qw//; + +my %OPT; +GetOptions( + \%OPT, + "help|?", + "verbose|v!", + "quiet|q!", + + "directory|d=s", + "force|f!", + "size|s=i", + + "users!", + "groups!", + "deleted!", + + "scrips!", + "tickets!", + "acls!", + + "clone", + "incremental", + + "gc=i", + "page=i", +) or Pod::Usage::pod2usage(); + +Pod::Usage::pod2usage(-verbose => 1) if $OPT{help}; + +my %args; +$args{Directory} = $OPT{directory}; +$args{Force} = $OPT{force}; +$args{MaxFileSize} = $OPT{size} if $OPT{size}; + +$args{AllUsers} = $OPT{users} if defined $OPT{users}; +$args{AllGroups} = $OPT{groups} if defined $OPT{groups}; +$args{FollowDeleted} = $OPT{deleted} if defined $OPT{deleted}; + +$args{FollowScrips} = $OPT{scrips} if defined $OPT{scrips}; +$args{FollowTickets} = $OPT{tickets} if defined $OPT{tickets}; +$args{FollowACL} = $OPT{acls} if defined $OPT{acls}; + +$args{Clone} = $OPT{clone} if $OPT{clone}; +$args{Incremental} = $OPT{incremental} if $OPT{incremental}; + +$args{GC} = defined $OPT{gc} ? $OPT{gc} : 5000; +$args{Page} = defined $OPT{page} ? $OPT{page} : 100; + +if (($OPT{clone} or $OPT{incremental}) + and grep { /^(users|groups|deleted|scrips|tickets|acls)$/ } keys %OPT) { + die "You cannot specify object types when cloning.\n\nPlease see $0 --help.\n"; +} + +my $walker; + +my $gnuplot = `which gnuplot`; +my $msg = ""; +if (-t STDOUT and not $OPT{verbose} and not $OPT{quiet}) { + $args{Progress} = RT::Migrate::progress( + top => \&gnuplot, + bottom => sub { print "\n$msg"; $msg = ""; }, + counts => sub { $walker->ObjectCount }, + max => { estimate() }, + ); + $args{MessageHandler} = sub { + print "\r", " "x60, "\r", $_[-1]; $msg = $_[-1]; + }; + $args{Verbose} = 0; +} +$args{Verbose} = 0 if $OPT{quiet}; + + +$walker = RT::Migrate::Serializer::File->new( %args ); + +my $log = RT::Migrate::setup_logging( $walker->{Directory} => 'serializer.log' ); +print "Logging warnings and errors to $log\n" if $log; + +print "Beginning database serialization..."; +my %counts = $walker->Export; + +my @files = $walker->Files; +print "Wrote @{[scalar @files]} files:\n"; +print " $_\n" for @files; +print "\n"; + +print "Total object counts:\n"; +for (sort {$counts{$b} <=> $counts{$a}} keys %counts) { + printf "%8d %s\n", $counts{$_}, $_; +} + +if ($log and -s $log) { + print STDERR "\n! Some warnings or errors occurred during serialization." + ."\n! Please see $log for details.\n\n"; +} else { + unlink $log; +} + +sub estimate { + $| = 1; + my %e; + + # Expected types we'll serialize + my @types = map {"RT::$_"} qw/ + Queue Ticket Transaction Attachment Link + User Group GroupMember Attribute + CustomField CustomFieldValue + ObjectCustomField ObjectCustomFieldValue + /; + + for my $class (@types) { + print "Estimating $class count..."; + my $collection = $class . "s"; + if ($collection->require) { + my $objs = $collection->new( RT->SystemUser ); + $objs->FindAllRows; + $objs->UnLimit; + $objs->{allow_deleted_search} = 1 if $class eq "RT::Ticket"; + $e{$class} = $objs->DBIx::SearchBuilder::Count; + } + print "\r", " "x60, "\r"; + } + + return %e; +} + + +sub gnuplot { + my ($elapsed, $rows, $cols) = @_; + my $length = $walker->StackSize; + my $file = $walker->Directory . "/progress.plot"; + open(my $dat, ">>", $file); + printf $dat "%10.3f\t%8d\n", $elapsed, $length; + close $dat; + + if ($rows <= 24 or not $gnuplot) { + print "\n\n"; + } elsif ($elapsed) { + my $gnuplot = qx| + gnuplot -e ' + set term dumb $cols @{[$rows - 12]}; + set xlabel "Seconds"; + unset key; + set xrange [0:*]; + set yrange [0:*]; + set title "Queue length"; + plot "$file" using 1:2 with lines + ' + |; + if ($? == 0 and $gnuplot) { + $gnuplot =~ s/^(\s*\n)//; + print $gnuplot; + unlink $file; + } else { + warn "Couldn't run gnuplot (\$? == $?): $!\n"; + } + } else { + print "\n" for 1..($rows - 13); + } +} + +=head1 NAME + +rt-serializer - Serialize an RT database to disk + +=head1 SYNOPSIS + + rt-validator --check && rt-serializer + +This script is used to write out the entire RT database to disk, for +later import into a different RT instance. It requires that the data in +the database be self-consistent, in order to do so; please make sure +that the database being exported passes validation by L<rt-validator> +before attempting to use C<rt-serializer>. + +While running, it will attempt to estimate the number of remaining +objects to be serialized; these estimates are pessimistic, and will be +incorrect if C<--no-users>, C<--no-groups>, or C<--no-tickets> are used. + +If the controlling terminal is large enough (more than 25 columns high) +and the C<gnuplot> program is installed, it will also show a textual +graph of the queue size over time. + +=head2 OPTIONS + +=over + +=item B<--directory> I<name> + +The name of the output directory to write data files to, which should +not exist yet; it is a fatal error if it does. Defaults to +C<< ./I<$Organization>:I<Date>/ >>, where I<$Organization> is as set in +F<RT_SiteConfig.pm>, and I<Date> is today's date. + +=item B<--force> + +Remove the output directory before starting. + +=item B<--size> I<megabytes> + +By default, C<rt-serializer> chunks its output into data files which are +around 32Mb in size; this option is used to set a different threshold +size, in megabytes. Note that this is the threshold after which it +rotates to writing a new file, and is as such the I<lower bound> on the +size of each output file. + +=item B<--no-users> + +By default, all privileged users are serialized; passing C<--no-users> +limits it to only those users which are referenced by serialized tickets +and history, and are thus necessary for internal consistency. + +=item B<--no-groups> + +By default, all groups are serialized; passing C<--no-groups> limits it +to only system-internal groups, which are needed for internal +consistency. + +=item B<--no-deleted> + +By default, all tickets, including deleted tickets, are serialized; +passing C<--no-deleted> skips deleted tickets during serialization. + +=item B<--scrips> + +No scrips or templates are serialized by default; this option forces all +scrips and templates to be serialized. + +=item B<--acls> + +No ACLs are serialized by default; this option forces all ACLs to be +serialized. + +=item B<--no-tickets> + +Skip serialization of all ticket data. + +=item B<--clone> + +Serializes your entire database, creating a clone. This option should +be used if you want to migrate your RT database from one database type +to another (e.g. MySQL to Postgres). It is an error to combine +C<--clone> with any option that limits object types serialized. No +dependency walking is performed when cloning. C<rt-importer> will detect +that your serialized data set was generated by a clone. + +=item B<--incremental> + +Will generate an incremenal serialized dataset using the data stored in +your IncrementalRecords database table. This assumes that you have created +that table and run RT using the Record_Local.pm shim as documented in +C<docs/incremental-export/>. + +=item B<--gc> I<n> + +Adjust how often the garbage collection sweep is done; lower numbers are +more frequent. See L</GARBAGE COLLECTION>. + +=item B<--page> I<n> + +Adjust how many rows are pulled from the database in a single query. Disable +paging by setting this to 0. Defaults to 100. + +Keep in mind that rows from RT's Attachments table are the limiting factor when +determining page size. You should likely be aiming for 60-75% of your total +memory on an otherwise unloaded box. + +=item B<--quiet> + +Do not show graphical progress UI. + +=item B<--verbose> + +Do not show graphical progress UI, but rather log was each row is +written out. + +=back + +=head1 GARBAGE COLLECTION + +C<rt-serializer> maintains a priority queue of objects to serialize, or +searches which may result in objects to serialize. When inserting into +this queue, it does no checking if the object in question is already in +the queue, or if the search will contain any results. These checks are +done when the object reaches the front of the queue, or during periodic +garbage collection. + +During periodic garbage collection, the entire queue is swept for +objects which have already been serialized, occur more than once in the +queue, and searches which contain no results in the database. This is +done to reduce the memory footprint of the serialization process, and is +triggered when enough new objects have been placed in the queue. This +parameter is tunable via the C<--gc> parameter, which defaults to +running garbage collection every 5,000 objects inserted into the queue; +smaller numbers will result in more frequent garbage collection. + +The default of 5,000 is roughly tuned based on a database with several +thousand tickets, but optimal values will vary wildly depending on +database configuration and size. Values as low as 25 have provided +speedups with smaller databases; if speed is a factor, experimenting +with different C<--gc> values may be helpful. Note that there are +significant boundary condition changes in serialization rate, as the +queue empties and fills, causing the time estimates to be rather +imprecise near the start and end of the process. + +Setting C<--gc> to 0 turns off all garbage collection. Be aware that +this will bloat the memory usage of the serializer. Any negative value +for C<--gc> turns off periodic garbage collection and instead objects +already serialized or in the queue are checked for at the time they +would be inserted. + +=cut + |