10 print STDERR "usage: $0 db_name[:server_name] db_user db_password\n";
14 # pretty correct support of charsets has been introduced in mysql 4.1
15 # as RT doesn't use it may result in issues:
16 # 1) data corruptions when default charset of mysql server has data restrictions like utf8
17 # 2) wrong ordering (collations)
19 # we have to define correct types for all columns. RT uses UTF-8, ascii and binary.
20 # * ascii is subset of many mysql's charsets except may be one or two rare where some ascii
21 # characters replaced with local
22 # * for many charsets mysql allows us to store any octets sequences even when those are
23 # invalid for this particula set, for example we can store UTF-8 data in latin1
24 # column and fetch it as UTF-8, however sorting will be wrong
26 # here is tricky algorithm to change column to desired charset:
27 # * text to binary convertion is pretty straight forward except that text types
28 # have length definitions in terms of characters and in some cases we must
29 # use longer binary types to satisfy space requirements
30 # * binary to text is much easier as we know that there is ascii or UTF-8 then
31 # we just make convertion, also 32 chars are long enough to store 32 bytes, so
32 # length changes is not required
33 # * text to text convertion is trickier. no matter what is the current character set
34 # of the column we know that there is either ascii or UTF-8, so we can not use
35 # direct convertion, instead we do text to binary plus binary to text convertion
37 # * as well we add charset definition for all tables and for the DB as well,
38 # so all new columns by default will be in UTF-8 charset
50 ObjectCustomFieldValues
66 ObjectType => 'ascii',
67 PrincipalType => 'ascii',
73 ContentType => 'ascii',
74 ContentEncoding => 'ascii',
80 Description => 'utf8',
82 ContentType => 'ascii',
83 ObjectType => 'ascii',
89 Description => 'utf8',
90 LookupType => 'ascii',
92 CustomFieldValues => {
94 Description => 'utf8',
98 Description => 'utf8',
107 ObjectCustomFieldValues => {
108 ObjectType => 'ascii',
110 LargeContent => 'binary',
111 ContentType => 'ascii',
112 ContentEncoding => 'ascii',
115 PrincipalType => 'ascii',
119 Description => 'utf8',
120 CorrespondAddress => 'ascii',
121 CommentAddress => 'ascii',
125 Description => 'utf8',
126 ExecModule => 'ascii',
127 Argument => 'binary',
131 Description => 'utf8',
132 ExecModule => 'ascii',
133 Argument => 'binary',
134 ApplicableTransTypes => 'ascii',
137 Description => 'utf8',
138 ConditionRules => 'utf8',
139 ActionRules => 'utf8',
140 CustomIsApplicableCode => 'utf8',
141 CustomPrepareCode => 'utf8',
142 CustomCommitCode => 'utf8',
146 id => 'binary', # ascii?
147 a_session => 'binary',
151 Description => 'utf8',
162 ObjectType => 'ascii',
167 ReferenceType => 'ascii',
172 Password => 'binary',
175 EmailAddress => 'ascii',
176 FreeformContactInfo => 'utf8',
177 Organization => 'utf8',
181 EmailEncoding => 'ascii',
182 WebEncoding => 'ascii',
183 ExternalContactInfoId => 'utf8',
184 ContactInfoSystem => 'utf8',
185 ExternalAuthId => 'utf8',
186 AuthSystem => 'utf8',
190 MobilePhone => 'utf8',
191 PagerPhone => 'utf8',
203 my %max_type_length = (
206 tinytext => int 1<<8,
207 mediumtext => int 1<<16,
209 longtext => int 1<<32,
214 my ($db_datasource, $db_user, $db_pass) = (shift, shift, shift);
215 my $dbh = DBI->connect("dbi:mysql:$db_datasource", $db_user, $db_pass, { RaiseError => 1 });
216 my $db_name = $db_datasource;
217 $db_name =~ s/:.*$//;
219 my $version = ($dbh->selectrow_array("show variables like 'version'"))[1];
220 ($version) = $version =~ /^(\d+\.\d+)/;
222 push @sql_commands, qq{ALTER DATABASE $db_name DEFAULT CHARACTER SET utf8};
223 convert_table($_) foreach @tables;
225 print join "\n", map(/;$/? $_ : "$_;", @sql_commands), "";
226 my $use_p = $db_pass ? " -p" : '';
227 print STDERR <<ENDREMINDER;
228 -- ** NOTICE: No database changes have been made. **
229 -- Please review the generated SQL, ensure you have a full backup of your database
230 -- and apply it to your database using a command like:
231 -- mysql -u ${db_user}${use_p} $db_name < queries.sql";
235 my %alter_aggregator;
238 @alter_aggregator{'char_to_binary','binary_to_char'} = (['DEFAULT CHARACTER SET utf8'],[]);
240 my $sth = $dbh->column_info( undef, $db_name, $table, undef );
242 while ( my $info = $sth->fetchrow_hashref ) {
243 convert_column(%$info);
245 for my $conversiontype (qw(char_to_binary binary_to_char)) {
246 next unless @{$alter_aggregator{$conversiontype}};
247 push @sql_commands, qq{ALTER TABLE $table\n }.
248 join(",\n ",@{$alter_aggregator{$conversiontype}});
254 my $table = $info{'TABLE_NAME'};
255 my $column = $info{'COLUMN_NAME'};
256 my $type = $info{'TYPE_NAME'};
257 return unless $type =~ /(CHAR|TEXT|BLOB|BINARY)$/i;
259 my $required_charset = $charset{$table}{$column};
260 unless ( $required_charset ) {
261 print STDERR join(".", @info{'TABLE_SCHEMA', 'TABLE_NAME', 'COLUMN_NAME'})
262 ." has type $type however mapping is missing.\n";
266 my $collation = column_info($table, $column)->{'collation'};
267 # mysql 4.1 returns literal NULL instead of undef
268 my $current_charset = $collation && $collation ne 'NULL'? (split /_/, $collation)[0]: 'binary';
269 return if $required_charset eq $current_charset;
271 if ( $required_charset eq 'binary' ) {
272 char_to_binary(%info);
274 elsif ( $current_charset eq 'binary' ) {
275 binary_to_char( $required_charset, %info);
277 char_to_char( $required_charset, %info);
284 my $table = $info{'TABLE_NAME'};
285 my $column = $info{'COLUMN_NAME'};
286 my $new_type = calc_suitable_binary_type(%info);
287 push @{$alter_aggregator{char_to_binary}},
288 "MODIFY $column $new_type ".build_column_definition(%info);
293 my ($charset, %info) = @_;
295 my $table = $info{'TABLE_NAME'};
296 my $column = $info{'COLUMN_NAME'};
297 my $new_type = lc $info{'TYPE_NAME'};
298 if ( $new_type =~ /binary/ ) {
299 $new_type =~ s/binary/char/;
300 $new_type .= '('. $info{'COLUMN_SIZE'} .')';
302 $new_type =~ s/blob/text/;
305 push @{$alter_aggregator{binary_to_char}},
306 "MODIFY $column ". uc($new_type) ." CHARACTER SET ". $charset
307 ." ". build_column_definition(%info);
311 my ($charset, %info) = @_;
313 my $table = $info{'TABLE_NAME'};
314 my $column = $info{'COLUMN_NAME'};
315 my $new_type = $info{'mysql_type_name'};
317 char_to_binary(%info);
318 push @{$alter_aggregator{binary_to_char}},
319 "MODIFY $column ". uc($new_type)." CHARACTER SET ". $charset
320 ." ". build_column_definition(%info);
323 sub calc_suitable_binary_type {
325 my $type = lc $info{'TYPE_NAME'};
326 return 'LONGBLOB' if $type eq 'longtext';
328 my $current_max_byte_length = column_byte_length(@info{qw(TABLE_NAME COLUMN_NAME)}) || 0;
329 if ( $max_type_length{ $type } > $current_max_byte_length ) {
330 if ( $type eq 'varchar' || $type eq 'char' ) {
331 my $new_type = $type;
332 $new_type =~ s/char/binary/;
333 $new_type .= $info{'COLUMN_SIZE'} >= $current_max_byte_length
334 ? '('. $info{'COLUMN_SIZE'} .')'
335 : '('. $current_max_byte_length .')';
338 my $new_type = $type;
339 $new_type =~ s/text/blob/;
344 foreach ( sort { $max_type_length{$a} <=> $max_type_length{$b} } keys %max_type_length ) {
345 next if $max_type_length{ $_ } <= $current_max_byte_length;
347 $new_type = $_; last;
349 $new_type =~ s/text/blob/;
354 sub build_column_definition {
358 $res .= 'NOT ' unless $info{'NULLABLE'};
360 my $default = column_info(@info{qw(TABLE_NAME COLUMN_NAME)})->{default};
361 if ( defined $default ) {
362 $res .= ' DEFAULT '. $dbh->quote($default);
363 } elsif ( $info{'NULLABLE'} ) {
364 $res .= ' DEFAULT NULL';
366 $res .= ' AUTO_INCREMENT' if $info{'mysql_is_auto_increment'};
370 sub column_byte_length {
371 my ($table, $column) = @_;
372 if ( $version >= 5.0 ) {
373 my ($char, $octet) = @{ $dbh->selectrow_arrayref(
374 "SELECT CHARACTER_MAXIMUM_LENGTH, CHARACTER_OCTET_LENGTH FROM information_schema.COLUMNS WHERE"
375 ." TABLE_SCHEMA = ". $dbh->quote($db_name)
376 ." AND TABLE_NAME = ". $dbh->quote($table)
377 ." AND COLUMN_NAME = ". $dbh->quote($column)
379 return $octet if $octet == $char;
381 return $dbh->selectrow_arrayref("SELECT MAX(LENGTH(". $dbh->quote_identifier($column) .")) FROM $table")->[0];
385 my ($table, $column) = @_;
386 # XXX: DBD::mysql doesn't provide this info, may be will do in 4.0007 if I'll write a patch
387 local $dbh->{FetchHashKeyName} = 'NAME_lc';
388 return $dbh->selectrow_hashref("SHOW FULL COLUMNS FROM $table LIKE " . $dbh->quote($column));