#
# COPYRIGHT:
#
-# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
+# This software is Copyright (c) 1996-2018 Best Practical Solutions, LLC
# <sales@bestpractical.com>
#
# (Except where explicitly superseded by other copyright notices)
use strict;
use warnings;
-use File::Find;
-use File::Copy;
-use Regexp::Common;
-use Carp;
+use open qw/ :std :encoding(UTF-8) /;
-# po dir is for extensions
-@ARGV = (<share/po/*.po>, <share/po/*.pot>, <po/*.po>, <po/*.pot>) unless @ARGV;
+use Locale::PO;
-our %FILECAT;
+use lib 'lib';
+use RT::I18N::Extract;
-# extract all strings and stuff them into %FILECAT
-# scan html dir for extensions
-File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin sbin lib share html etc) );
+$| = 1;
-# ensure proper escaping and [_1] => %1 transformation
-foreach my $str ( sort keys %FILECAT ) {
- my $entry = $FILECAT{$str};
- my $oldstr = $str;
+# po dir is for extensions
+@ARGV = (<share/po/*.po>, <share/po/*.pot>, <po/*.po>, <po/*.pot>) unless @ARGV;
- $str =~ s/\\/\\\\/g;
- $str =~ s/\"/\\"/g;
- $str =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
- $str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
- $str =~ s/~([\[\]])/$1/g;
+# extract all strings and stuff them into %POT
+# scan html dir for extensions
+my $extract = RT::I18N::Extract->new;
+our %POT = $extract->all;
- delete $FILECAT{$oldstr};
- $FILECAT{$str} = $entry;
-}
+print "$_\n" for $extract->errors;
# update all language dictionaries
foreach my $dict (@ARGV) {
update($lang, $dict);
}
-# warn about various red flags in loc strings
-foreach my $str ( sort keys %FILECAT ) {
- my $entry = $FILECAT{$str};
- my $entry_count = @$entry;
-
- # doesn't exist in the current codebase, ignore for now
- next if $entry_count == 0;
-
- my ($filename, $line) = @{ $entry->[0] };
-
- my $location = "$filename line $line" . ($entry_count > 1 ? " (and ".($entry_count-1)." other places)" : "");
-
- if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) {
- warn "Extraneous whitespace in '$str' at $location\n";
- }
-
- if ($str =~ /([\$\@]\w+)/) {
- warn "Interpolated variable '$1' in '$str' at $location\n";
- }
-}
-
-
-sub extract_strings_from_code {
- my $file = $_;
-
- local $/;
- return if ( -d $_ || !-e _ );
- return
- if ( $File::Find::dir =~
- qr!lib/blib|lib/t/autogen|var|m4|local|share/fonts! );
- return if ( /\.(?:pot|po|bak|gif|png|psd|jpe?g|svg|css|js)$/ );
- return if ( /~|,D|,B$|extract-message-catalog$|tweak-template-locstring$/ );
- return if ( /StyleGuide.pod/ );
- return if ( /^[\.#]/ );
- return if ( -f "$_.in" );
-
- print "Looking at $File::Find::name\n";
- my $filename = $File::Find::name;
- $filename =~ s'^\./'';
- $filename =~ s'\.in$'';
-
- unless (open _, '<', $file) {
- print "Cannot open $file for reading ($!), skipping.\n";
- return;
- }
-
- my $re_space_wo_nl = qr{(?!\n)\s};
- my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx;
- my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx;
- my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx;
- my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx;
- my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep};
-
- $_ = <_>;
-
- # Mason filter: <&|/l>...</&> and <&|/l_unsafe>...</&>
- my $line = 1;
- while (m!\G(.*?<&\|/l(?:_unsafe)?(.*?)&>(.*?)</&>)!sg) {
- my ( $all, $vars, $str ) = ( $1, $2, $3 );
- $vars =~ s/[\n\r]//g;
- $line += ( $all =~ tr/\n/\n/ );
- $str =~ s/\\'/\'/g;
- #print "STR IS $str\n";
- push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
- }
-
- # Localization function: loc(...)
- $line = 1;
- pos($_) = 0;
- while (m/\G(.*?\bloc$RE{balanced}{-parens=>'()'}{-keep})/sg) {
- my ( $all, $match ) = ( $1, $2 );
- $line += ( $all =~ tr/\n/\n/ );
-
- my ( $vars, $str );
- if ( $match =~
- /\(\s*($re_delim)(.*?)\s*\)$/so ) {
-
- $str = substr( $1, 1, -1 ); # $str comes before $vars now
- $vars = $9;
- }
- else {
- next;
- }
-
- $vars =~ s/[\n\r]//g;
- $str =~ s/\\'/\'/g;
-
- push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
- }
-
- # Comment-based mark: "..." # loc
- $line = 1;
- pos($_) = 0;
- while (m/\G(.*?($re_delim)[\}\)\],;]*$re_loc_suffix)/smgo) {
- my ( $all, $str ) = ( $1, $2 );
- $line += ( $all =~ tr/\n/\n/ );
- unless ( defined $str ) {
- warn "Couldn't process loc at $filename:$line";
- next;
- }
- $str = substr($str, 1, -1);
- $str =~ s/\\'/\'/g;
- push @{ $FILECAT{$str} }, [ $filename, $line, '' ];
- }
-
- # Comment-based qw mark: "qw(...)" # loc_qw
- $line = 1;
- pos($_) = 0;
- while (m/\G(.*?(?:qw\(([^)]+)\)\s*[\{\}\)\],; ]*)?$re_loc_qw_suffix)/smgo) {
- my ( $all, $str ) = ( $1, $2 );
- $line += ( $all =~ tr/\n/\n/ );
- unless ( defined $str ) {
- warn "Couldn't process loc_qw at $filename:$line";
- next;
- }
- foreach my $value (split ' ', $str) {
- push @{ $FILECAT{$value} }, [ $filename, $line, '' ];
- }
- }
-
- # Comment-based left pair mark: "..." => ... # loc_left_pair
- $line = 1;
- pos($_) = 0;
- while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) {
- my ( $all, $key ) = ( $1, $2 );
- $line += ( $all =~ tr/\n/\n/ );
- unless ( defined $key ) {
- warn "Couldn't process loc_left_pair at $filename:$line";
- next;
- }
- $key =~ s/\\'/\'/g;
- push @{ $FILECAT{$key} }, [ $filename, $line, '' ];
- }
- # Comment-based pair mark: "..." => "..." # loc_pair
- $line = 1;
- pos($_) = 0;
- while (m/\G(.*?(?:(\w+)\s*=>\s*($re_delim)[\}\)\],;]*)?$re_loc_pair_suffix)/smgo) {
- my ( $all, $key, $val ) = ( $1, $2, $3 );
- $line += ( $all =~ tr/\n/\n/ );
- unless ( defined $key && defined $val ) {
- warn "Couldn't process loc_pair at $filename:$line";
- next;
- }
- $val = substr($val, 1, -1);
- $key =~ s/\\'/\'/g;
- $val =~ s/\\'/\'/g;
- push @{ $FILECAT{$key} }, [ $filename, $line, '' ];
- push @{ $FILECAT{$val} }, [ $filename, $line, '' ];
- }
-
- close (_);
+sub uniq {
+ my %seen;
+ return grep { !$seen{$_}++ } @_;
}
sub update {
my $lang = shift;
my $file = shift;
- my ( %Lexicon, %Header);
- my $out = '';
unless (!-e $file or -w $file) {
- warn "Can't write to $lang, skipping...\n";
- return;
+ warn "Can't write to $lang, skipping...\n";
+ return;
}
- print "Updating $lang...\n";
-
- my @lines;
- @lines = (<LEXICON>) if open LEXICON, '<', $file;
- @lines = grep { !/^(#(:|\.)\s*|$)/ } @lines;
- while (@lines) {
- my $msghdr = "";
- $msghdr .= shift @lines while ( $lines[0] && $lines[0] !~ /^(#~ )?msgid/ );
-
- my $msgid = "";
-
-# '#~ ' is the prefix of launchpad for msg that's not found the the source
-# we'll remove the prefix later so we can still show them with our own mark
-
- $msgid .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgid|")/ );
- my $msgstr = "";
- $msgstr .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgstr|")/ );
-
- last unless $msgid;
-
- chomp $msgid;
- chomp $msgstr;
+ my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );
- $msgid =~ s/^#~ //mg;
- $msgstr =~ s/^#~ //mg;
+ print "Updating $lang";
+ my $lexicon = Locale::PO->load_file_ashash( $file, "utf-8" );
- $msgid =~ s/^msgid "(.*)"\s*?$/$1/m or warn "$msgid in $file";
+ # Default to the empty string for new ones
+ $lexicon->{$_->msgid} ||= $_
+ for values %POT;
- if ( $msgid eq '' ) {
- # null msgid, msgstr will have head info
- $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/ms or warn "$msgstr in $file";
- }
- else {
- $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/m or warn "$msgstr in $file";
- }
+ my $errors = 0;
+ for my $msgid ( keys %{$lexicon} ) {
+ my $entry = $lexicon->{$msgid};
- if ( $msgid ne '' ) {
- for my $msg ( \$msgid, \$msgstr ) {
- if ( $$msg =~ /\n/ ) {
- my @lines = split /\n/, $$msg;
- $$msg =
- shift @lines; # first line don't need to handle any more
- for (@lines) {
- if (/^"(.*)"\s*$/) {
- $$msg .= $1;
- }
- }
- }
-
- # convert \\n back to \n
- $$msg =~ s/(?!\\)\\n/\n/g;
- }
+ # Don't output empty translations for english
+ if (not length $entry->dequote($entry->msgstr) and $is_english) {
+ delete $lexicon->{$msgid};
+ next;
}
- $Lexicon{$msgid} = $msgstr;
- $Header{$msgid} = $msghdr;
- }
-
- my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );
+ # The PO properties at the top are always fine to leave as-is
+ next if not length $entry->dequote($msgid);
- foreach my $str ( keys %FILECAT ) {
- $Lexicon{$str} ||= '';
- }
- foreach ( sort keys %Lexicon ) {
- my $f = join ( ' ', sort map $_->[0].":".$_->[1], @{ $FILECAT{$_} } );
- my $nospace = $_;
- $nospace =~ s/ +$//;
-
- if ( !$Lexicon{$_} and $Lexicon{$nospace} ) {
- $Lexicon{$_} =
- $Lexicon{$nospace} . ( ' ' x ( length($_) - length($nospace) ) );
+ # Not found in source? Drop it
+ my $source = $POT{$msgid};
+ if (not $source) {
+ delete $lexicon->{$msgid};
+ next;
}
- next if !length( $Lexicon{$_} ) and $is_english;
-
- my %seen;
- $out .= $Header{$_} if exists $Header{$_};
+ # Pull in the properties from the source
+ $entry->reference( $source->reference );
+ $entry->automatic( $source->automatic );
+ my $fail = validate_msgstr($lang,
+ map {$entry->dequote($_)}
+ $entry->msgid, $entry->msgstr);
+ next unless $fail;
+ print "\n" unless $errors++;
+ print $fail;
+ }
+ my @order = map {$_->[0]}
+ sort {$a->[1] cmp $b->[1]}
+ map {[$_, $_->dequote($_->msgid)]}
+ values %{$lexicon};
- next if (!$f && $_ && !$Lexicon{$_});
- if ( $f && $f !~ /^\s+$/ ) {
+ Locale::PO->save_file_fromarray($file, \@order, "utf-8")
+ or die "Couldn't update '$file': $!";
- $out .= "#: $f\n";
- }
- elsif ($_) {
- $out .= "#: NOT FOUND IN SOURCE\n";
- }
- foreach my $entry ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $FILECAT{$_} } ) {
- my ( $file, $line, $var ) = @{$entry};
- $var =~ s/^\s*,\s*//;
- $var =~ s/\s*$//;
- $out .= "#. ($var)\n" unless $seen{$var}++;
- }
- $out .= 'msgid ' . fmt($_) . "msgstr \"$Lexicon{$_}\"\n\n";
+ if ($errors) {
+ print "\n";
+ } else {
+ print "\r", " "x100, "\r";
}
-
- open PO, '>', $file or die "Couldn't open '$file' for writing: $!";
- print PO $out;
- close PO;
-
return 1;
}
-sub escape {
- my $text = shift;
- $text =~ s/\b_(\d+)/%$1/;
- return $text;
-}
+sub validate_msgstr {
+ my $lang = shift;
+ my $msgid = shift;
+ my $msgstr = shift;
-sub fmt {
- my $str = shift;
- return "\"$str\"\n" unless $str =~ /\n/;
+ return if not defined $msgstr or $msgstr eq ''; # no translation for this string
- my $multi_line = ($str =~ /\n(?!\z)/);
- $str =~ s/\n/\\n"\n"/g;
+ # we uniq because a string can use a placeholder more than once
+ # (eg %1 %quant(%1, ...) like in our czech localization
+ my @expected_variables = uniq($msgid =~ /%\d+/g);
+ my @got_variables = uniq($msgstr =~ /%\d+/g);
- if ($str =~ /\n"$/) {
- chop $str;
- }
- else {
- $str .= "\"\n";
- }
- return $multi_line ? qq(""\n"$str) : qq("$str);
+ # this catches the case where expected uses %1,%2 and got uses %1,%3
+ # unlike a simple @expected_variables == @got_variables
+ my $expected = join ", ", sort @expected_variables;
+ my $got = join ", ", sort @got_variables;
+ return if $expected eq $got;
+
+ return " expected (" . $expected . ") in msgid: $msgid\n" .
+ " got (" . $got . ") in msgstr: $msgstr\n";
}