2 # BEGIN BPS TAGGED BLOCK {{{
6 # This software is Copyright (c) 1996-2017 Best Practical Solutions, LLC
7 # <sales@bestpractical.com>
9 # (Except where explicitly superseded by other copyright notices)
14 # This work is made available to you under the terms of Version 2 of
15 # the GNU General Public License. A copy of that license should have
16 # been provided with this software, but in any event can be snarfed
19 # This work is distributed in the hope that it will be useful, but
20 # WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 # General Public License for more details.
24 # You should have received a copy of the GNU General Public License
25 # along with this program; if not, write to the Free Software
26 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 # 02110-1301 or visit their web page on the internet at
28 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
31 # CONTRIBUTION SUBMISSION POLICY:
33 # (The following paragraph is not intended to limit the rights granted
34 # to you to modify and distribute this software under the terms of
35 # the GNU General Public License and is only of importance to you if
36 # you choose to contribute your changes and enhancements to the
37 # community by submitting them to Best Practical Solutions, LLC.)
39 # By intentionally submitting any modifications, corrections or
40 # derivatives to this work, or any other work intended for use with
41 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
42 # you are the copyright holder for those contributions and you grant
43 # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
44 # royalty-free, perpetual, license to use, copy, create derivative
45 # works based on those contributions, and sublicense and distribute
46 # those contributions and any derivatives thereof.
48 # END BPS TAGGED BLOCK }}}
56 print STDERR "usage: $0 db_name[:server_name] db_user db_password\n";
60 # pretty correct support of charsets has been introduced in mysql 4.1
61 # as RT doesn't use it may result in issues:
62 # 1) data corruptions when default charset of mysql server has data restrictions like utf8
63 # 2) wrong ordering (collations)
65 # we have to define correct types for all columns. RT uses UTF-8, ascii and binary.
66 # * ascii is subset of many mysql's charsets except may be one or two rare where some ascii
67 # characters replaced with local
68 # * for many charsets mysql allows us to store any octets sequences even when those are
69 # invalid for this particula set, for example we can store UTF-8 data in latin1
70 # column and fetch it as UTF-8, however sorting will be wrong
72 # here is tricky algorithm to change column to desired charset:
73 # * text to binary convertion is pretty straight forward except that text types
74 # have length definitions in terms of characters and in some cases we must
75 # use longer binary types to satisfy space requirements
76 # * binary to text is much easier as we know that there is ascii or UTF-8 then
77 # we just make convertion, also 32 chars are long enough to store 32 bytes, so
78 # length changes is not required
79 # * text to text convertion is trickier. no matter what is the current character set
80 # of the column we know that there is either ascii or UTF-8, so we can not use
81 # direct convertion, instead we do text to binary plus binary to text convertion
83 # * as well we add charset definition for all tables and for the DB as well,
84 # so all new columns by default will be in UTF-8 charset
96 ObjectCustomFieldValues
115 RightName => 'ascii',
116 ObjectType => 'ascii',
117 PrincipalType => 'ascii',
120 MessageId => 'ascii',
123 ContentType => 'ascii',
124 ContentEncoding => 'ascii',
130 Description => 'utf8',
132 ContentType => 'ascii',
133 ObjectType => 'ascii',
139 Description => 'utf8',
140 LookupType => 'ascii',
142 CustomFieldValues => {
144 Description => 'utf8',
153 Description => 'utf8',
156 ObjectType => 'ascii',
160 Description => 'utf8',
161 ObjectType => 'ascii',
165 Description => 'utf8',
174 ObjectCustomFieldValues => {
175 ObjectType => 'ascii',
177 LargeContent => 'binary',
178 ContentType => 'ascii',
179 ContentEncoding => 'ascii',
182 PrincipalType => 'ascii',
186 Description => 'utf8',
187 CorrespondAddress => 'utf8',
188 CommentAddress => 'utf8',
192 Description => 'utf8',
193 ExecModule => 'ascii',
194 Argument => 'binary',
198 Description => 'utf8',
199 ExecModule => 'ascii',
200 Argument => 'binary',
201 ApplicableTransTypes => 'ascii',
204 Description => 'utf8',
205 ConditionRules => 'utf8',
206 ActionRules => 'utf8',
207 CustomIsApplicableCode => 'utf8',
208 CustomPrepareCode => 'utf8',
209 CustomCommitCode => 'utf8',
213 id => 'binary', # ascii?
214 a_session => 'binary',
218 Description => 'utf8',
229 ObjectType => 'ascii',
234 ReferenceType => 'ascii',
239 Password => 'binary',
242 EmailAddress => 'utf8',
243 FreeformContactInfo => 'utf8',
244 Organization => 'utf8',
248 EmailEncoding => 'ascii',
249 WebEncoding => 'ascii',
250 ExternalContactInfoId => 'utf8',
251 ContactInfoSystem => 'utf8',
252 ExternalAuthId => 'utf8',
253 AuthSystem => 'utf8',
257 MobilePhone => 'utf8',
258 PagerPhone => 'utf8',
270 my %max_type_length = (
273 tinytext => int 1<<8,
274 mediumtext => int 1<<16,
276 longtext => int 1<<32,
281 my ($db_datasource, $db_user, $db_pass) = (shift, shift, shift);
282 my $dbh = DBI->connect("dbi:mysql:$db_datasource", $db_user, $db_pass, { RaiseError => 1 });
283 my $db_name = $db_datasource;
284 $db_name =~ s/:.*$//;
286 my $version = ($dbh->selectrow_array("show variables like 'version'"))[1];
287 ($version) = $version =~ /^(\d+\.\d+)/;
289 push @sql_commands, qq{ALTER DATABASE `$db_name` DEFAULT CHARACTER SET utf8};
290 convert_table($_) foreach @tables;
292 print join "\n", map(/;$/? $_ : "$_;", @sql_commands), "";
293 my $use_p = $db_pass ? " -p" : '';
294 print STDERR <<ENDREMINDER;
295 -- ** NOTICE: No database changes have been made. **
296 -- Please review the generated SQL, ensure you have a full backup of your database
297 -- and apply it to your database using a command like:
298 -- mysql -u ${db_user}${use_p} $db_name < queries.sql";
302 my %alter_aggregator;
305 @alter_aggregator{'char_to_binary','binary_to_char'} = (['DEFAULT CHARACTER SET utf8'],[]);
307 my $sth = $dbh->column_info( undef, $db_name, $table, undef );
309 my $columns = $sth->fetchall_arrayref({});
310 return unless @$columns;
311 foreach my $info (@$columns) {
312 convert_column(%$info);
314 for my $conversiontype (qw(char_to_binary binary_to_char)) {
315 next unless @{$alter_aggregator{$conversiontype}};
316 push @sql_commands, qq{ALTER TABLE $table\n }.
317 join(",\n ",@{$alter_aggregator{$conversiontype}});
323 my $table = $info{'TABLE_NAME'};
324 my $column = $info{'COLUMN_NAME'};
325 my $type = $info{'TYPE_NAME'};
326 return unless $type =~ /(CHAR|TEXT|BLOB|BINARY)$/i;
328 my $required_charset = $charset{$table}{$column};
329 unless ( $required_charset ) {
330 print STDERR join(".", @info{'TABLE_SCHEMA', 'TABLE_NAME', 'COLUMN_NAME'})
331 ." has type $type however mapping is missing.\n";
335 my $collation = column_info($table, $column)->{'collation'};
336 # mysql 4.1 returns literal NULL instead of undef
337 my $current_charset = $collation && $collation ne 'NULL'? (split /_/, $collation)[0]: 'binary';
338 return if $required_charset eq $current_charset;
340 if ( $required_charset eq 'binary' ) {
341 char_to_binary(%info);
343 elsif ( $current_charset eq 'binary' ) {
344 binary_to_char( $required_charset, %info);
346 char_to_char( $required_charset, %info);
353 my $table = $info{'TABLE_NAME'};
354 my $column = $info{'COLUMN_NAME'};
355 my $new_type = calc_suitable_binary_type(%info);
356 push @{$alter_aggregator{char_to_binary}},
357 "MODIFY $column $new_type ".build_column_definition(%info);
362 my ($charset, %info) = @_;
364 my $table = $info{'TABLE_NAME'};
365 my $column = $info{'COLUMN_NAME'};
366 my $new_type = lc $info{'TYPE_NAME'};
367 if ( $new_type =~ /binary/ ) {
368 $new_type =~ s/binary/char/;
369 $new_type .= '('. $info{'COLUMN_SIZE'} .')';
371 $new_type =~ s/blob/text/;
374 push @{$alter_aggregator{binary_to_char}},
375 "MODIFY $column ". uc($new_type) ." CHARACTER SET ". $charset
376 ." ". build_column_definition(%info);
380 my ($charset, %info) = @_;
382 my $table = $info{'TABLE_NAME'};
383 my $column = $info{'COLUMN_NAME'};
384 my $new_type = $info{'mysql_type_name'};
386 char_to_binary(%info);
387 push @{$alter_aggregator{binary_to_char}},
388 "MODIFY $column ". uc($new_type)." CHARACTER SET ". $charset
389 ." ". build_column_definition(%info);
392 sub calc_suitable_binary_type {
394 my $type = lc $info{'TYPE_NAME'};
395 return 'LONGBLOB' if $type eq 'longtext';
397 my $current_max_byte_length = column_byte_length(@info{qw(TABLE_NAME COLUMN_NAME)}) || 0;
398 if ( $max_type_length{ $type } > $current_max_byte_length ) {
399 if ( $type eq 'varchar' || $type eq 'char' ) {
400 my $new_type = $type;
401 $new_type =~ s/char/binary/;
402 $new_type .= $info{'COLUMN_SIZE'} >= $current_max_byte_length
403 ? '('. $info{'COLUMN_SIZE'} .')'
404 : '('. $current_max_byte_length .')';
407 my $new_type = $type;
408 $new_type =~ s/text/blob/;
413 foreach ( sort { $max_type_length{$a} <=> $max_type_length{$b} } keys %max_type_length ) {
414 next if $max_type_length{ $_ } <= $current_max_byte_length;
416 $new_type = $_; last;
418 $new_type =~ s/text/blob/;
423 sub build_column_definition {
427 $res .= 'NOT ' unless $info{'NULLABLE'};
429 my $default = column_info(@info{qw(TABLE_NAME COLUMN_NAME)})->{default};
430 if ( defined $default ) {
431 $res .= ' DEFAULT '. $dbh->quote($default);
432 } elsif ( $info{'NULLABLE'} ) {
433 $res .= ' DEFAULT NULL';
435 $res .= ' AUTO_INCREMENT' if $info{'mysql_is_auto_increment'};
439 sub column_byte_length {
440 my ($table, $column) = @_;
441 if ( $version >= 5.0 ) {
442 # information_schema searches can be case sensitive
443 # and users may use lower_case_table_names, use LOWER
444 # for everything just in case
445 # http://dev.mysql.com/doc/refman/5.1/en/charset-collation-information-schema.html
446 my ($char, $octet) = @{ $dbh->selectrow_arrayref(
447 "SELECT CHARACTER_MAXIMUM_LENGTH, CHARACTER_OCTET_LENGTH FROM information_schema.COLUMNS WHERE"
448 ." LOWER(TABLE_SCHEMA) = ". lc( $dbh->quote($db_name) )
449 ." AND LOWER(TABLE_NAME) = ". lc( $dbh->quote($table) )
450 ." AND LOWER(COLUMN_NAME) = ". lc( $dbh->quote($column) )
452 return $octet if $octet == $char;
454 return $dbh->selectrow_arrayref("SELECT MAX(LENGTH(". $dbh->quote_identifier($column) .")) FROM $table")->[0];
458 my ($table, $column) = @_;
459 # XXX: DBD::mysql doesn't provide this info, may be will do in 4.0007 if I'll write a patch
460 local $dbh->{FetchHashKeyName} = 'NAME_lc';
461 return $dbh->selectrow_hashref("SHOW FULL COLUMNS FROM $table LIKE " . $dbh->quote($column));