X-Git-Url: http://git.freeside.biz/gitweb/?p=freeside.git;a=blobdiff_plain;f=rt%2Fdevel%2Ftools%2Fextract-message-catalog;h=aba2d8c05a06b2e23ad06da4831f4e23e7f18f02;hp=1533cfa611301ad92023839f7012e6da65b5b547;hb=7322f2afedcc2f427e997d1535a503613a83f088;hpb=85e677b86fc37c54e6de2b06340351a28f5a5916 diff --git a/rt/devel/tools/extract-message-catalog b/rt/devel/tools/extract-message-catalog index 1533cfa61..aba2d8c05 100644 --- a/rt/devel/tools/extract-message-catalog +++ b/rt/devel/tools/extract-message-catalog @@ -3,7 +3,7 @@ # # COPYRIGHT: # -# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC +# This software is Copyright (c) 1996-2016 Best Practical Solutions, LLC # # # (Except where explicitly superseded by other copyright notices) @@ -51,10 +51,14 @@ use strict; use warnings; +use open qw/ :std :encoding(UTF-8) /; + use File::Find; use File::Copy; use Regexp::Common; use Carp; +use Locale::PO; +$| = 1; # po dir is for extensions @ARGV = (, , , ) unless @ARGV; @@ -67,17 +71,37 @@ File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin # ensure proper escaping and [_1] => %1 transformation foreach my $str ( sort keys %FILECAT ) { - my $entry = $FILECAT{$str}; - my $oldstr = $str; + my $entry = delete $FILECAT{$str}; + next unless @{$entry}; + + my ($filename, $line) = @{ $entry->[0] }; + my $location = "$filename line $line" . (@{$entry} > 1 ? " (and ".(@{$entry}-1)." other places)" : ""); + + if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) { + warn "Extraneous whitespace in '$str' at $location\n"; + } + if (grep {$_->[3]} @{$entry} and $str =~ /([\$\@]\w+)/) { + warn "Interpolated variable '$1' in '$str' at $location\n"; + } - $str =~ s/\\/\\\\/g; - $str =~ s/\"/\\"/g; + my $escape = sub { $_ = shift; s/\b_(\d+)/%$1/; $_ }; $str =~ s/((?($3).")"/eg; $str =~ s/~([\[\]])/$1/g; - delete $FILECAT{$oldstr}; - $FILECAT{$str} = $entry; + my $po = Locale::PO->new(-msgid => $str, -msgstr => ""); + $po->reference( join ( ' ', sort map $_->[0].":".$_->[1], @{ $entry } ) ); + my %seen; + my @vars; + foreach my $find ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $entry } ) { + my ( $file, $line, $var ) = @{$find}; + $var =~ s/^\s*,\s*//; + $var =~ s/\s*$//; + push @vars, "($var)" unless $seen{$var}++; + } + $po->automatic( join( "\n", @vars) ); + + $FILECAT{$po->msgid} = $po; } # update all language dictionaries @@ -93,28 +117,6 @@ foreach my $dict (@ARGV) { update($lang, $dict); } -# warn about various red flags in loc strings -foreach my $str ( sort keys %FILECAT ) { - my $entry = $FILECAT{$str}; - my $entry_count = @$entry; - - # doesn't exist in the current codebase, ignore for now - next if $entry_count == 0; - - my ($filename, $line) = @{ $entry->[0] }; - - my $location = "$filename line $line" . ($entry_count > 1 ? " (and ".($entry_count-1)." other places)" : ""); - - if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) { - warn "Extraneous whitespace in '$str' at $location\n"; - } - - if ($str =~ /([\$\@]\w+)/) { - warn "Interpolated variable '$1' in '$str' at $location\n"; - } -} - - sub extract_strings_from_code { my $file = $_; @@ -129,33 +131,35 @@ sub extract_strings_from_code { return if ( /^[\.#]/ ); return if ( -f "$_.in" ); - print "Looking at $File::Find::name\n"; + print "Looking at $File::Find::name"; my $filename = $File::Find::name; $filename =~ s'^\./''; $filename =~ s'\.in$''; unless (open _, '<', $file) { - print "Cannot open $file for reading ($!), skipping.\n"; + print "\n Cannot open $file for reading ($!), skipping.\n\n"; return; } + my $errors = 0; + my $re_space_wo_nl = qr{(?!\n)\s}; my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx; my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx; + my $re_loc_paren_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc \(\) $re_space_wo_nl* $}mx; my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx; my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx; my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep}; $_ = <_>; - # Mason filter: <&|/l>... + # Mason filter: <&|/l>... and <&|/l_unsafe>... my $line = 1; - while (m!\G(.*?<&\|/l(.*?)&>(.*?))!sg) { + while (m!\G(.*?<&\|/l(?:_unsafe)?(.*?)&>(.*?))!sg) { my ( $all, $vars, $str ) = ( $1, $2, $3 ); $vars =~ s/[\n\r]//g; $line += ( $all =~ tr/\n/\n/ ); - $str =~ s/\\'/\'/g; - #print "STR IS $str\n"; + $str =~ s/\\(['"\\])/$1/g; push @{ $FILECAT{$str} }, [ $filename, $line, $vars ]; } @@ -167,45 +171,73 @@ sub extract_strings_from_code { $line += ( $all =~ tr/\n/\n/ ); my ( $vars, $str ); - if ( $match =~ - /\(\s*($re_delim)(.*?)\s*\)$/so ) { + next unless ( $match =~ /\(\s*($re_delim)(.*?)\s*\)$/so ); - $str = substr( $1, 1, -1 ); # $str comes before $vars now - $vars = $9; - } - else { - next; - } + my $interp = (substr($1,0,1) eq '"' ? 1 : 0); + $str = substr( $1, 1, -1 ); # $str comes before $vars now + $vars = $9; $vars =~ s/[\n\r]//g; - $str =~ s/\\'/\'/g; + $str =~ s/\\(['"\\])/$1/g; - push @{ $FILECAT{$str} }, [ $filename, $line, $vars ]; + push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ]; } + my %seen; # Comment-based mark: "..." # loc $line = 1; pos($_) = 0; - while (m/\G(.*?($re_delim)[\}\)\],;]*$re_loc_suffix)/smgo) { + while (m/\G(.*?($re_delim)[ \{\}\)\],;]*$re_loc_suffix)/smgo) { my ( $all, $str ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); + $seen{$line}++; unless ( defined $str ) { - warn "Couldn't process loc at $filename:$line"; + print "\n" unless $errors++; + print " Couldn't process loc at $filename:$line:\n $str\n"; next; } + my $interp = (substr($str,0,1) eq '"' ? 1 : 0); $str = substr($str, 1, -1); - $str =~ s/\\'/\'/g; - push @{ $FILECAT{$str} }, [ $filename, $line, '' ]; + $str =~ s/\\(['"\\])/$1/g; + push @{ $FILECAT{$str} }, [ $filename, $line, '', $interp ]; + } + + # Comment-based mark for list to loc(): ("...", $foo, $bar) # loc() + $line = 1; + pos($_) = 0; + while (m/\G(.*? $RE{balanced}{-parens=>'()'}{-keep} [ \{\}\)\],;]* $re_loc_paren_suffix)/sgx) { + my ( $all, $match ) = ( $1, $2 ); + $line += ( $all =~ tr/\n/\n/ ); + + my ( $vars, $str ); + unless ( $match =~ + /\(\s*($re_delim)(.*?)\s*\)$/so ) { + print "\n" unless $errors++; + print " Failed to match delimited against $match, line $line"; + next; + } + + my $interp = (substr($1,0,1) eq '"' ? 1 : 0); + $str = substr( $1, 1, -1 ); # $str comes before $vars now + $vars = $9; + $seen{$line}++; + + $vars =~ s/[\n\r]//g; + $str =~ s/\\(['"\\])/$1/g; + + push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ]; } # Comment-based qw mark: "qw(...)" # loc_qw $line = 1; pos($_) = 0; - while (m/\G(.*?(?:qw\(([^)]+)\)[\}\)\],;]*)?$re_loc_qw_suffix)/smgo) { + while (m/\G(.*?(?:qw\(([^)]+)\)[ \{\}\)\],;]*)?$re_loc_qw_suffix)/smgo) { my ( $all, $str ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); + $seen{$line}++; unless ( defined $str ) { - warn "Couldn't process loc_qw at $filename:$line"; + print "\n" unless $errors++; + print " Couldn't process loc_qw at $filename:$line:\n $str\n"; next; } foreach my $value (split ' ', $str) { @@ -219,167 +251,164 @@ sub extract_strings_from_code { while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) { my ( $all, $key ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); + $seen{$line}++; unless ( defined $key ) { - warn "Couldn't process loc_left_pair at $filename:$line"; + print "\n" unless $errors++; + print " Couldn't process loc_left_pair at $filename:$line:\n $key\n"; next; } - $key =~ s/\\'/\'/g; - push @{ $FILECAT{$key} }, [ $filename, $line, '' ]; + my $interp = (substr($key,0,1) eq '"' ? 1 : 0); + $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string + push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp ]; } # Comment-based pair mark: "..." => "..." # loc_pair $line = 1; pos($_) = 0; - while (m/\G(.*?(?:(\w+)\s*=>\s*($re_delim)[\}\)\],;]*)?$re_loc_pair_suffix)/smgo) { - my ( $all, $key, $val ) = ( $1, $2, $3 ); + while (m/\G(.*?(?:(\w+|$re_delim)\s*=>\s*($re_delim)[ \{\}\)\],;]*)?$re_loc_pair_suffix)/smgo) { + my ( $all, $key, $val ) = ( $1, $2, $10 ); + $line += ( $all =~ tr/\n/\n/ ); + $seen{$line}++; + unless ( defined $key && defined $val ) { + print "\n" unless $errors++; + print " Couldn't process loc_pair at $filename:$line:\n $key\n $val\n"; + next; + } + my $interp_key = (substr($key,0,1) eq '"' ? 1 : 0); + $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string + push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp_key ]; + + my $interp_val = (substr($val,0,1) eq '"' ? 1 : 0); + $val = substr($val, 1, -1); # dequote always quoted string + $val =~ s/\\(['"\\])/$1/g; + push @{ $FILECAT{$val} }, [ $filename, $line, '', $interp_val ]; + } + + # Specific key foo => "...", #loc{foo} + $line = 1; + pos($_) = 0; + while (m/\G(.*?(\w+|$re_delim)\s*=>\s*($re_delim)(?-s:.*?)\#$re_space_wo_nl*loc\{\2\}$re_space_wo_nl*)$/smgo) { + my ( $all, $key, $val ) = ( $1, $2, $10 ); $line += ( $all =~ tr/\n/\n/ ); + $seen{$line}++; unless ( defined $key && defined $val ) { - warn "Couldn't process loc_pair at $filename:$line"; + warn "Couldn't process loc_pair at $filename:$line:\n $key\n $val\n"; next; } - $val = substr($val, 1, -1); - $key =~ s/\\'/\'/g; - $val =~ s/\\'/\'/g; - push @{ $FILECAT{$key} }, [ $filename, $line, '' ]; - push @{ $FILECAT{$val} }, [ $filename, $line, '' ]; + $val = substr($val, 1, -1); # dequote always quoted string + $val =~ s/\\(['"])/$1/g; + push @{ $FILECAT{$val} }, [ $filename, $line, '' ]; + } + + # Check for ones we missed + $line = 1; + pos($_) = 0; + while (m/\G(.*? \# $re_space_wo_nl* (loc (_\w+|\(\)|{$re_delim})?) $re_space_wo_nl* $)/smgox) { + my ($all, $loc_type) = ($1, $2); + $line += ( $all =~ tr/\n/\n/ ); + next if $seen{$line}; + print "\n" unless $errors++; + print " $loc_type that did not match, line $line of $filename\n"; + } + + if ($errors) { + print "\n" + } else { + print "\r", " " x 100, "\r"; } close (_); } +sub uniq { + my %seen; + return grep { !$seen{$_}++ } @_; +} + sub update { my $lang = shift; my $file = shift; - my ( %Lexicon, %Header); - my $out = ''; unless (!-e $file or -w $file) { - warn "Can't write to $lang, skipping...\n"; - return; + warn "Can't write to $lang, skipping...\n"; + return; } - print "Updating $lang...\n"; - - my @lines; - @lines = () if open LEXICON, '<', $file; - @lines = grep { !/^(#(:|\.)\s*|$)/ } @lines; - while (@lines) { - my $msghdr = ""; - $msghdr .= shift @lines while ( $lines[0] && $lines[0] !~ /^(#~ )?msgid/ ); - - my $msgid = ""; - -# '#~ ' is the prefix of launchpad for msg that's not found the the source -# we'll remove the prefix later so we can still show them with our own mark - - $msgid .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgid|")/ ); - my $msgstr = ""; - $msgstr .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgstr|")/ ); - - last unless $msgid; + my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ ); - chomp $msgid; - chomp $msgstr; + print "Updating $lang"; + my $lexicon = Locale::PO->load_file_ashash( $file, "utf-8" ); - $msgid =~ s/^#~ //mg; - $msgstr =~ s/^#~ //mg; + # Default to the empty string for new ones + $lexicon->{$_->msgid} ||= $_ + for values %FILECAT; - $msgid =~ s/^msgid "(.*)"\s*?$/$1/m or warn "$msgid in $file"; + my $errors = 0; + for my $msgid ( keys %{$lexicon} ) { + my $entry = $lexicon->{$msgid}; - if ( $msgid eq '' ) { - # null msgid, msgstr will have head info - $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/ms or warn "$msgstr in $file"; - } - else { - $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/m or warn "$msgstr in $file"; + # Don't output empty translations for english + if (not length $entry->dequote($entry->msgstr) and $is_english) { + delete $lexicon->{$msgid}; + next; } - if ( $msgid ne '' ) { - for my $msg ( \$msgid, \$msgstr ) { - if ( $$msg =~ /\n/ ) { - my @lines = split /\n/, $$msg; - $$msg = - shift @lines; # first line don't need to handle any more - for (@lines) { - if (/^"(.*)"\s*$/) { - $$msg .= $1; - } - } - } - - # convert \\n back to \n - $$msg =~ s/(?!\\)\\n/\n/g; - } - } + # The PO properties at the top are always fine to leave as-is + next if not length $entry->dequote($msgid); - $Lexicon{$msgid} = $msgstr; - $Header{$msgid} = $msghdr; - } - - my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ ); - - foreach my $str ( keys %FILECAT ) { - $Lexicon{$str} ||= ''; - } - foreach ( sort keys %Lexicon ) { - my $f = join ( ' ', sort map $_->[0].":".$_->[1], @{ $FILECAT{$_} } ); - my $nospace = $_; - $nospace =~ s/ +$//; - - if ( !$Lexicon{$_} and $Lexicon{$nospace} ) { - $Lexicon{$_} = - $Lexicon{$nospace} . ( ' ' x ( length($_) - length($nospace) ) ); + # Not found in source? Drop it + my $source = $FILECAT{$msgid}; + if (not $source) { + delete $lexicon->{$msgid}; + next; } - next if !length( $Lexicon{$_} ) and $is_english; - - my %seen; - $out .= $Header{$_} if exists $Header{$_}; + # Pull in the properties from the source + $entry->reference( $source->reference ); + $entry->automatic( $source->automatic ); + my $fail = validate_msgstr($lang, + map {$entry->dequote($_)} + $entry->msgid, $entry->msgstr); + next unless $fail; + print "\n" unless $errors++; + print $fail; + } + my @order = map {$_->[0]} + sort {$a->[1] cmp $b->[1]} + map {[$_, $_->dequote($_->msgid)]} + values %{$lexicon}; - next if (!$f && $_ && !$Lexicon{$_}); - if ( $f && $f !~ /^\s+$/ ) { + Locale::PO->save_file_fromarray($file, \@order, "utf-8") + or die "Couldn't update '$file': $!"; - $out .= "#: $f\n"; - } - elsif ($_) { - $out .= "#: NOT FOUND IN SOURCE\n"; - } - foreach my $entry ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $FILECAT{$_} } ) { - my ( $file, $line, $var ) = @{$entry}; - $var =~ s/^\s*,\s*//; - $var =~ s/\s*$//; - $out .= "#. ($var)\n" unless $seen{$var}++; - } - $out .= 'msgid ' . fmt($_) . "msgstr \"$Lexicon{$_}\"\n\n"; + if ($errors) { + print "\n"; + } else { + print "\r", " "x100, "\r"; } - - open PO, '>', $file or die "Couldn't open '$file' for writing: $!"; - print PO $out; - close PO; - return 1; } -sub escape { - my $text = shift; - $text =~ s/\b_(\d+)/%$1/; - return $text; -} +sub validate_msgstr { + my $lang = shift; + my $msgid = shift; + my $msgstr = shift; -sub fmt { - my $str = shift; - return "\"$str\"\n" unless $str =~ /\n/; + return if not defined $msgstr or $msgstr eq ''; # no translation for this string - my $multi_line = ($str =~ /\n(?!\z)/); - $str =~ s/\n/\\n"\n"/g; + # we uniq because a string can use a placeholder more than once + # (eg %1 %quant(%1, ...) like in our czech localization + my @expected_variables = uniq($msgid =~ /%\d+/g); + my @got_variables = uniq($msgstr =~ /%\d+/g); - if ($str =~ /\n"$/) { - chop $str; - } - else { - $str .= "\"\n"; - } - return $multi_line ? qq(""\n"$str) : qq("$str); + # this catches the case where expected uses %1,%2 and got uses %1,%3 + # unlike a simple @expected_variables == @got_variables + my $expected = join ", ", sort @expected_variables; + my $got = join ", ", sort @got_variables; + return if $expected eq $got; + + return " expected (" . $expected . ") in msgid: $msgid\n" . + " got (" . $got . ") in msgstr: $msgstr\n"; }