summaryrefslogtreecommitdiff
path: root/rt/devel/tools/extract-message-catalog
diff options
context:
space:
mode:
authorIvan Kohler <ivan@freeside.biz>2015-07-09 22:18:55 -0700
committerIvan Kohler <ivan@freeside.biz>2015-07-09 22:18:55 -0700
commit1c538bfabc2cd31f27067505f0c3d1a46cba6ef0 (patch)
tree96922ad4459eda1e649327fd391d60c58d454c53 /rt/devel/tools/extract-message-catalog
parent4f5619288413a185e9933088d9dd8c5afbc55dfa (diff)
RT 4.2.11, ticket#13852
Diffstat (limited to 'rt/devel/tools/extract-message-catalog')
-rw-r--r--rt/devel/tools/extract-message-catalog365
1 files changed, 197 insertions, 168 deletions
diff --git a/rt/devel/tools/extract-message-catalog b/rt/devel/tools/extract-message-catalog
index cba84d2..5dd89b8 100644
--- a/rt/devel/tools/extract-message-catalog
+++ b/rt/devel/tools/extract-message-catalog
@@ -51,10 +51,14 @@
use strict;
use warnings;
+use open qw/ :std :encoding(UTF-8) /;
+
use File::Find;
use File::Copy;
use Regexp::Common;
use Carp;
+use Locale::PO;
+$| = 1;
# po dir is for extensions
@ARGV = (<share/po/*.po>, <share/po/*.pot>, <po/*.po>, <po/*.pot>) unless @ARGV;
@@ -67,17 +71,37 @@ File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin
# ensure proper escaping and [_1] => %1 transformation
foreach my $str ( sort keys %FILECAT ) {
- my $entry = $FILECAT{$str};
- my $oldstr = $str;
+ my $entry = delete $FILECAT{$str};
+ next unless @{$entry};
+
+ my ($filename, $line) = @{ $entry->[0] };
+ my $location = "$filename line $line" . (@{$entry} > 1 ? " (and ".(@{$entry}-1)." other places)" : "");
+
+ if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) {
+ warn "Extraneous whitespace in '$str' at $location\n";
+ }
+ if (grep {$_->[3]} @{$entry} and $str =~ /([\$\@]\w+)/) {
+ warn "Interpolated variable '$1' in '$str' at $location\n";
+ }
- $str =~ s/\\/\\\\/g;
- $str =~ s/\"/\\"/g;
+ my $escape = sub { $_ = shift; s/\b_(\d+)/%$1/; $_ };
$str =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
- $str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
+ $str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".$escape->($3).")"/eg;
$str =~ s/~([\[\]])/$1/g;
- delete $FILECAT{$oldstr};
- $FILECAT{$str} = $entry;
+ my $po = Locale::PO->new(-msgid => $str, -msgstr => "");
+ $po->reference( join ( ' ', sort map $_->[0].":".$_->[1], @{ $entry } ) );
+ my %seen;
+ my @vars;
+ foreach my $find ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $entry } ) {
+ my ( $file, $line, $var ) = @{$find};
+ $var =~ s/^\s*,\s*//;
+ $var =~ s/\s*$//;
+ push @vars, "($var)" unless $seen{$var}++;
+ }
+ $po->automatic( join( "\n", @vars) );
+
+ $FILECAT{$po->msgid} = $po;
}
# update all language dictionaries
@@ -93,28 +117,6 @@ foreach my $dict (@ARGV) {
update($lang, $dict);
}
-# warn about various red flags in loc strings
-foreach my $str ( sort keys %FILECAT ) {
- my $entry = $FILECAT{$str};
- my $entry_count = @$entry;
-
- # doesn't exist in the current codebase, ignore for now
- next if $entry_count == 0;
-
- my ($filename, $line) = @{ $entry->[0] };
-
- my $location = "$filename line $line" . ($entry_count > 1 ? " (and ".($entry_count-1)." other places)" : "");
-
- if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) {
- warn "Extraneous whitespace in '$str' at $location\n";
- }
-
- if ($str =~ /([\$\@]\w+)/) {
- warn "Interpolated variable '$1' in '$str' at $location\n";
- }
-}
-
-
sub extract_strings_from_code {
my $file = $_;
@@ -129,19 +131,22 @@ sub extract_strings_from_code {
return if ( /^[\.#]/ );
return if ( -f "$_.in" );
- print "Looking at $File::Find::name\n";
+ print "Looking at $File::Find::name";
my $filename = $File::Find::name;
$filename =~ s'^\./'';
$filename =~ s'\.in$'';
unless (open _, '<', $file) {
- print "Cannot open $file for reading ($!), skipping.\n";
+ print "\n Cannot open $file for reading ($!), skipping.\n\n";
return;
}
+ my $errors = 0;
+
my $re_space_wo_nl = qr{(?!\n)\s};
my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx;
my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx;
+ my $re_loc_paren_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc \(\) $re_space_wo_nl* $}mx;
my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx;
my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx;
my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep};
@@ -154,8 +159,7 @@ sub extract_strings_from_code {
my ( $all, $vars, $str ) = ( $1, $2, $3 );
$vars =~ s/[\n\r]//g;
$line += ( $all =~ tr/\n/\n/ );
- $str =~ s/\\'/\'/g;
- #print "STR IS $str\n";
+ $str =~ s/\\(['"\\])/$1/g;
push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
}
@@ -167,45 +171,73 @@ sub extract_strings_from_code {
$line += ( $all =~ tr/\n/\n/ );
my ( $vars, $str );
- if ( $match =~
- /\(\s*($re_delim)(.*?)\s*\)$/so ) {
+ next unless ( $match =~ /\(\s*($re_delim)(.*?)\s*\)$/so );
- $str = substr( $1, 1, -1 ); # $str comes before $vars now
- $vars = $9;
- }
- else {
- next;
- }
+ my $interp = (substr($1,0,1) eq '"' ? 1 : 0);
+ $str = substr( $1, 1, -1 ); # $str comes before $vars now
+ $vars = $9;
$vars =~ s/[\n\r]//g;
- $str =~ s/\\'/\'/g;
+ $str =~ s/\\(['"\\])/$1/g;
- push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
+ push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ];
}
+ my %seen;
# Comment-based mark: "..." # loc
$line = 1;
pos($_) = 0;
- while (m/\G(.*?($re_delim)[\}\)\],;]*$re_loc_suffix)/smgo) {
+ while (m/\G(.*?($re_delim)[ \{\}\)\],;]*$re_loc_suffix)/smgo) {
my ( $all, $str ) = ( $1, $2 );
$line += ( $all =~ tr/\n/\n/ );
+ $seen{$line}++;
unless ( defined $str ) {
- warn "Couldn't process loc at $filename:$line";
+ print "\n" unless $errors++;
+ print " Couldn't process loc at $filename:$line:\n $str\n";
next;
}
+ my $interp = (substr($str,0,1) eq '"' ? 1 : 0);
$str = substr($str, 1, -1);
- $str =~ s/\\'/\'/g;
- push @{ $FILECAT{$str} }, [ $filename, $line, '' ];
+ $str =~ s/\\(['"\\])/$1/g;
+ push @{ $FILECAT{$str} }, [ $filename, $line, '', $interp ];
+ }
+
+ # Comment-based mark for list to loc(): ("...", $foo, $bar) # loc()
+ $line = 1;
+ pos($_) = 0;
+ while (m/\G(.*? $RE{balanced}{-parens=>'()'}{-keep} [ \{\}\)\],;]* $re_loc_paren_suffix)/sgx) {
+ my ( $all, $match ) = ( $1, $2 );
+ $line += ( $all =~ tr/\n/\n/ );
+
+ my ( $vars, $str );
+ unless ( $match =~
+ /\(\s*($re_delim)(.*?)\s*\)$/so ) {
+ print "\n" unless $errors++;
+ print " Failed to match delimited against $match, line $line";
+ next;
+ }
+
+ my $interp = (substr($1,0,1) eq '"' ? 1 : 0);
+ $str = substr( $1, 1, -1 ); # $str comes before $vars now
+ $vars = $9;
+ $seen{$line}++;
+
+ $vars =~ s/[\n\r]//g;
+ $str =~ s/\\(['"\\])/$1/g;
+
+ push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ];
}
# Comment-based qw mark: "qw(...)" # loc_qw
$line = 1;
pos($_) = 0;
- while (m/\G(.*?(?:qw\(([^)]+)\)\s*[\{\}\)\],; ]*)?$re_loc_qw_suffix)/smgo) {
+ while (m/\G(.*?(?:qw\(([^)]+)\)[ \{\}\)\],;]*)?$re_loc_qw_suffix)/smgo) {
my ( $all, $str ) = ( $1, $2 );
$line += ( $all =~ tr/\n/\n/ );
+ $seen{$line}++;
unless ( defined $str ) {
- warn "Couldn't process loc_qw at $filename:$line";
+ print "\n" unless $errors++;
+ print " Couldn't process loc_qw at $filename:$line:\n $str\n";
next;
}
foreach my $value (split ' ', $str) {
@@ -219,167 +251,164 @@ sub extract_strings_from_code {
while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) {
my ( $all, $key ) = ( $1, $2 );
$line += ( $all =~ tr/\n/\n/ );
+ $seen{$line}++;
unless ( defined $key ) {
- warn "Couldn't process loc_left_pair at $filename:$line";
+ print "\n" unless $errors++;
+ print " Couldn't process loc_left_pair at $filename:$line:\n $key\n";
next;
}
- $key =~ s/\\'/\'/g;
- push @{ $FILECAT{$key} }, [ $filename, $line, '' ];
+ my $interp = (substr($key,0,1) eq '"' ? 1 : 0);
+ $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string
+ push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp ];
}
# Comment-based pair mark: "..." => "..." # loc_pair
$line = 1;
pos($_) = 0;
- while (m/\G(.*?(?:(\w+)\s*=>\s*($re_delim)[\}\)\],;]*)?$re_loc_pair_suffix)/smgo) {
- my ( $all, $key, $val ) = ( $1, $2, $3 );
+ while (m/\G(.*?(?:(\w+|$re_delim)\s*=>\s*($re_delim)[ \{\}\)\],;]*)?$re_loc_pair_suffix)/smgo) {
+ my ( $all, $key, $val ) = ( $1, $2, $10 );
+ $line += ( $all =~ tr/\n/\n/ );
+ $seen{$line}++;
+ unless ( defined $key && defined $val ) {
+ print "\n" unless $errors++;
+ print " Couldn't process loc_pair at $filename:$line:\n $key\n $val\n";
+ next;
+ }
+ my $interp_key = (substr($key,0,1) eq '"' ? 1 : 0);
+ $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string
+ push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp_key ];
+
+ my $interp_val = (substr($val,0,1) eq '"' ? 1 : 0);
+ $val = substr($val, 1, -1); # dequote always quoted string
+ $val =~ s/\\(['"\\])/$1/g;
+ push @{ $FILECAT{$val} }, [ $filename, $line, '', $interp_val ];
+ }
+
+ # Specific key foo => "...", #loc{foo}
+ $line = 1;
+ pos($_) = 0;
+ while (m/\G(.*?(\w+|$re_delim)\s*=>\s*($re_delim)(?-s:.*?)\#$re_space_wo_nl*loc\{\2\}$re_space_wo_nl*)$/smgo) {
+ my ( $all, $key, $val ) = ( $1, $2, $10 );
$line += ( $all =~ tr/\n/\n/ );
+ $seen{$line}++;
unless ( defined $key && defined $val ) {
- warn "Couldn't process loc_pair at $filename:$line";
+ warn "Couldn't process loc_pair at $filename:$line:\n $key\n $val\n";
next;
}
- $val = substr($val, 1, -1);
- $key =~ s/\\'/\'/g;
- $val =~ s/\\'/\'/g;
- push @{ $FILECAT{$key} }, [ $filename, $line, '' ];
- push @{ $FILECAT{$val} }, [ $filename, $line, '' ];
+ $val = substr($val, 1, -1); # dequote always quoted string
+ $val =~ s/\\(['"])/$1/g;
+ push @{ $FILECAT{$val} }, [ $filename, $line, '' ];
+ }
+
+ # Check for ones we missed
+ $line = 1;
+ pos($_) = 0;
+ while (m/\G(.*? \# $re_space_wo_nl* (loc (_\w+|\(\)|{$re_delim})?) $re_space_wo_nl* $)/smgox) {
+ my ($all, $loc_type) = ($1, $2);
+ $line += ( $all =~ tr/\n/\n/ );
+ next if $seen{$line};
+ print "\n" unless $errors++;
+ print " $loc_type that did not match, line $line of $filename\n";
+ }
+
+ if ($errors) {
+ print "\n"
+ } else {
+ print "\r", " " x 100, "\r";
}
close (_);
}
+sub uniq {
+ my %seen;
+ return grep { !$seen{$_}++ } @_;
+}
+
sub update {
my $lang = shift;
my $file = shift;
- my ( %Lexicon, %Header);
- my $out = '';
unless (!-e $file or -w $file) {
- warn "Can't write to $lang, skipping...\n";
- return;
+ warn "Can't write to $lang, skipping...\n";
+ return;
}
- print "Updating $lang...\n";
-
- my @lines;
- @lines = (<LEXICON>) if open LEXICON, '<', $file;
- @lines = grep { !/^(#(:|\.)\s*|$)/ } @lines;
- while (@lines) {
- my $msghdr = "";
- $msghdr .= shift @lines while ( $lines[0] && $lines[0] !~ /^(#~ )?msgid/ );
-
- my $msgid = "";
-
-# '#~ ' is the prefix of launchpad for msg that's not found the the source
-# we'll remove the prefix later so we can still show them with our own mark
-
- $msgid .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgid|")/ );
- my $msgstr = "";
- $msgstr .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgstr|")/ );
-
- last unless $msgid;
+ my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );
- chomp $msgid;
- chomp $msgstr;
+ print "Updating $lang";
+ my $lexicon = Locale::PO->load_file_ashash( $file, "utf-8" );
- $msgid =~ s/^#~ //mg;
- $msgstr =~ s/^#~ //mg;
+ # Default to the empty string for new ones
+ $lexicon->{$_->msgid} ||= $_
+ for values %FILECAT;
- $msgid =~ s/^msgid "(.*)"\s*?$/$1/m or warn "$msgid in $file";
+ my $errors = 0;
+ for my $msgid ( keys %{$lexicon} ) {
+ my $entry = $lexicon->{$msgid};
- if ( $msgid eq '' ) {
- # null msgid, msgstr will have head info
- $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/ms or warn "$msgstr in $file";
- }
- else {
- $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/m or warn "$msgstr in $file";
+ # Don't output empty translations for english
+ if (not length $entry->dequote($entry->msgstr) and $is_english) {
+ delete $lexicon->{$msgid};
+ next;
}
- if ( $msgid ne '' ) {
- for my $msg ( \$msgid, \$msgstr ) {
- if ( $$msg =~ /\n/ ) {
- my @lines = split /\n/, $$msg;
- $$msg =
- shift @lines; # first line don't need to handle any more
- for (@lines) {
- if (/^"(.*)"\s*$/) {
- $$msg .= $1;
- }
- }
- }
-
- # convert \\n back to \n
- $$msg =~ s/(?!\\)\\n/\n/g;
- }
- }
+ # The PO properties at the top are always fine to leave as-is
+ next if not length $entry->dequote($msgid);
- $Lexicon{$msgid} = $msgstr;
- $Header{$msgid} = $msghdr;
- }
-
- my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );
-
- foreach my $str ( keys %FILECAT ) {
- $Lexicon{$str} ||= '';
- }
- foreach ( sort keys %Lexicon ) {
- my $f = join ( ' ', sort map $_->[0].":".$_->[1], @{ $FILECAT{$_} } );
- my $nospace = $_;
- $nospace =~ s/ +$//;
-
- if ( !$Lexicon{$_} and $Lexicon{$nospace} ) {
- $Lexicon{$_} =
- $Lexicon{$nospace} . ( ' ' x ( length($_) - length($nospace) ) );
+ # Not found in source? Drop it
+ my $source = $FILECAT{$msgid};
+ if (not $source) {
+ delete $lexicon->{$msgid};
+ next;
}
- next if !length( $Lexicon{$_} ) and $is_english;
-
- my %seen;
- $out .= $Header{$_} if exists $Header{$_};
+ # Pull in the properties from the source
+ $entry->reference( $source->reference );
+ $entry->automatic( $source->automatic );
+ my $fail = validate_msgstr($lang,
+ map {$entry->dequote($_)}
+ $entry->msgid, $entry->msgstr);
+ next unless $fail;
+ print "\n" unless $errors++;
+ print $fail;
+ }
+ my @order = map {$_->[0]}
+ sort {$a->[1] cmp $b->[1]}
+ map {[$_, $_->dequote($_->msgid)]}
+ values %{$lexicon};
- next if (!$f && $_ && !$Lexicon{$_});
- if ( $f && $f !~ /^\s+$/ ) {
+ Locale::PO->save_file_fromarray($file, \@order, "utf-8")
+ or die "Couldn't update '$file': $!";
- $out .= "#: $f\n";
- }
- elsif ($_) {
- $out .= "#: NOT FOUND IN SOURCE\n";
- }
- foreach my $entry ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $FILECAT{$_} } ) {
- my ( $file, $line, $var ) = @{$entry};
- $var =~ s/^\s*,\s*//;
- $var =~ s/\s*$//;
- $out .= "#. ($var)\n" unless $seen{$var}++;
- }
- $out .= 'msgid ' . fmt($_) . "msgstr \"$Lexicon{$_}\"\n\n";
+ if ($errors) {
+ print "\n";
+ } else {
+ print "\r", " "x100, "\r";
}
-
- open PO, '>', $file or die "Couldn't open '$file' for writing: $!";
- print PO $out;
- close PO;
-
return 1;
}
-sub escape {
- my $text = shift;
- $text =~ s/\b_(\d+)/%$1/;
- return $text;
-}
+sub validate_msgstr {
+ my $lang = shift;
+ my $msgid = shift;
+ my $msgstr = shift;
-sub fmt {
- my $str = shift;
- return "\"$str\"\n" unless $str =~ /\n/;
+ return if not defined $msgstr or $msgstr eq ''; # no translation for this string
- my $multi_line = ($str =~ /\n(?!\z)/);
- $str =~ s/\n/\\n"\n"/g;
+ # we uniq because a string can use a placeholder more than once
+ # (eg %1 %quant(%1, ...) like in our czech localization
+ my @expected_variables = uniq($msgid =~ /%\d+/g);
+ my @got_variables = uniq($msgstr =~ /%\d+/g);
- if ($str =~ /\n"$/) {
- chop $str;
- }
- else {
- $str .= "\"\n";
- }
- return $multi_line ? qq(""\n"$str) : qq("$str);
+ # this catches the case where expected uses %1,%2 and got uses %1,%3
+ # unlike a simple @expected_variables == @got_variables
+ my $expected = join ", ", sort @expected_variables;
+ my $got = join ", ", sort @got_variables;
+ return if $expected eq $got;
+
+ return " expected (" . $expected . ") in msgid: $msgid\n" .
+ " got (" . $got . ") in msgstr: $msgstr\n";
}