#!/usr/bin/env perl # BEGIN BPS TAGGED BLOCK {{{ # # COPYRIGHT: # # This software is Copyright (c) 1996-2016 Best Practical Solutions, LLC # # # (Except where explicitly superseded by other copyright notices) # # # LICENSE: # # This work is made available to you under the terms of Version 2 of # the GNU General Public License. A copy of that license should have # been provided with this software, but in any event can be snarfed # from www.gnu.org. # # This work is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301 or visit their web page on the internet at # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html. # # # CONTRIBUTION SUBMISSION POLICY: # # (The following paragraph is not intended to limit the rights granted # to you to modify and distribute this software under the terms of # the GNU General Public License and is only of importance to you if # you choose to contribute your changes and enhancements to the # community by submitting them to Best Practical Solutions, LLC.) # # By intentionally submitting any modifications, corrections or # derivatives to this work, or any other work intended for use with # Request Tracker, to Best Practical Solutions, LLC, you confirm that # you are the copyright holder for those contributions and you grant # Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable, # royalty-free, perpetual, license to use, copy, create derivative # works based on those contributions, and sublicense and distribute # those contributions and any derivatives thereof. # # END BPS TAGGED BLOCK }}} # Portions Copyright 2002 Autrijus Tang use strict; use warnings; use open qw/ :std :encoding(UTF-8) /; use File::Find; use File::Copy; use Regexp::Common; use Carp; use Locale::PO; $| = 1; # po dir is for extensions @ARGV = (, , , ) unless @ARGV; our %FILECAT; # extract all strings and stuff them into %FILECAT # scan html dir for extensions File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin sbin lib share html etc) ); # ensure proper escaping and [_1] => %1 transformation foreach my $str ( sort keys %FILECAT ) { my $entry = delete $FILECAT{$str}; next unless @{$entry}; my ($filename, $line) = @{ $entry->[0] }; my $location = "$filename line $line" . (@{$entry} > 1 ? " (and ".(@{$entry}-1)." other places)" : ""); if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) { warn "Extraneous whitespace in '$str' at $location\n"; } if (grep {$_->[3]} @{$entry} and $str =~ /([\$\@]\w+)/) { warn "Interpolated variable '$1' in '$str' at $location\n"; } my $escape = sub { $_ = shift; s/\b_(\d+)/%$1/; $_ }; $str =~ s/((?($3).")"/eg; $str =~ s/~([\[\]])/$1/g; my $po = Locale::PO->new(-msgid => $str, -msgstr => ""); $po->reference( join ( ' ', sort map $_->[0].":".$_->[1], @{ $entry } ) ); my %seen; my @vars; foreach my $find ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $entry } ) { my ( $file, $line, $var ) = @{$find}; $var =~ s/^\s*,\s*//; $var =~ s/\s*$//; push @vars, "($var)" unless $seen{$var}++; } $po->automatic( join( "\n", @vars) ); $FILECAT{$po->msgid} = $po; } # update all language dictionaries foreach my $dict (@ARGV) { $dict = "share/po/$dict.pot" if ( $dict eq 'rt' ); $dict = "share/po/$dict.po" unless -f $dict or $dict =~ m!/!; my $lang = $dict; $lang =~ s|.*/||; $lang =~ s|\.po$||; $lang =~ s|\.pot$||; update($lang, $dict); } sub extract_strings_from_code { my $file = $_; local $/; return if ( -d $_ || !-e _ ); return if ( $File::Find::dir =~ qr!lib/blib|lib/t/autogen|var|m4|local|share/fonts! ); return if ( /\.(?:pot|po|bak|gif|png|psd|jpe?g|svg|css|js)$/ ); return if ( /~|,D|,B$|extract-message-catalog$|tweak-template-locstring$/ ); return if ( /StyleGuide.pod/ ); return if ( /^[\.#]/ ); return if ( -f "$_.in" ); print "Looking at $File::Find::name"; my $filename = $File::Find::name; $filename =~ s'^\./''; $filename =~ s'\.in$''; unless (open _, '<', $file) { print "\n Cannot open $file for reading ($!), skipping.\n\n"; return; } my $errors = 0; my $re_space_wo_nl = qr{(?!\n)\s}; my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx; my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx; my $re_loc_paren_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc \(\) $re_space_wo_nl* $}mx; my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx; my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx; my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep}; $_ = <_>; # Mason filter: <&|/l>... and <&|/l_unsafe>... my $line = 1; while (m!\G(.*?<&\|/l(?:_unsafe)?(.*?)&>(.*?))!sg) { my ( $all, $vars, $str ) = ( $1, $2, $3 ); $vars =~ s/[\n\r]//g; $line += ( $all =~ tr/\n/\n/ ); $str =~ s/\\(['"\\])/$1/g; push @{ $FILECAT{$str} }, [ $filename, $line, $vars ]; } # Localization function: loc(...) $line = 1; pos($_) = 0; while (m/\G(.*?\bloc$RE{balanced}{-parens=>'()'}{-keep})/sg) { my ( $all, $match ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); my ( $vars, $str ); next unless ( $match =~ /\(\s*($re_delim)(.*?)\s*\)$/so ); my $interp = (substr($1,0,1) eq '"' ? 1 : 0); $str = substr( $1, 1, -1 ); # $str comes before $vars now $vars = $9; $vars =~ s/[\n\r]//g; $str =~ s/\\(['"\\])/$1/g; push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ]; } my %seen; # Comment-based mark: "..." # loc $line = 1; pos($_) = 0; while (m/\G(.*?($re_delim)[ \{\}\)\],;]*$re_loc_suffix)/smgo) { my ( $all, $str ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); $seen{$line}++; unless ( defined $str ) { print "\n" unless $errors++; print " Couldn't process loc at $filename:$line:\n $str\n"; next; } my $interp = (substr($str,0,1) eq '"' ? 1 : 0); $str = substr($str, 1, -1); $str =~ s/\\(['"\\])/$1/g; push @{ $FILECAT{$str} }, [ $filename, $line, '', $interp ]; } # Comment-based mark for list to loc(): ("...", $foo, $bar) # loc() $line = 1; pos($_) = 0; while (m/\G(.*? $RE{balanced}{-parens=>'()'}{-keep} [ \{\}\)\],;]* $re_loc_paren_suffix)/sgx) { my ( $all, $match ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); my ( $vars, $str ); unless ( $match =~ /\(\s*($re_delim)(.*?)\s*\)$/so ) { print "\n" unless $errors++; print " Failed to match delimited against $match, line $line"; next; } my $interp = (substr($1,0,1) eq '"' ? 1 : 0); $str = substr( $1, 1, -1 ); # $str comes before $vars now $vars = $9; $seen{$line}++; $vars =~ s/[\n\r]//g; $str =~ s/\\(['"\\])/$1/g; push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ]; } # Comment-based qw mark: "qw(...)" # loc_qw $line = 1; pos($_) = 0; while (m/\G(.*?(?:qw\(([^)]+)\)[ \{\}\)\],;]*)?$re_loc_qw_suffix)/smgo) { my ( $all, $str ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); $seen{$line}++; unless ( defined $str ) { print "\n" unless $errors++; print " Couldn't process loc_qw at $filename:$line:\n $str\n"; next; } foreach my $value (split ' ', $str) { push @{ $FILECAT{$value} }, [ $filename, $line, '' ]; } } # Comment-based left pair mark: "..." => ... # loc_left_pair $line = 1; pos($_) = 0; while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) { my ( $all, $key ) = ( $1, $2 ); $line += ( $all =~ tr/\n/\n/ ); $seen{$line}++; unless ( defined $key ) { print "\n" unless $errors++; print " Couldn't process loc_left_pair at $filename:$line:\n $key\n"; next; } my $interp = (substr($key,0,1) eq '"' ? 1 : 0); $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp ]; } # Comment-based pair mark: "..." => "..." # loc_pair $line = 1; pos($_) = 0; while (m/\G(.*?(?:(\w+|$re_delim)\s*=>\s*($re_delim)[ \{\}\)\],;]*)?$re_loc_pair_suffix)/smgo) { my ( $all, $key, $val ) = ( $1, $2, $10 ); $line += ( $all =~ tr/\n/\n/ ); $seen{$line}++; unless ( defined $key && defined $val ) { print "\n" unless $errors++; print " Couldn't process loc_pair at $filename:$line:\n $key\n $val\n"; next; } my $interp_key = (substr($key,0,1) eq '"' ? 1 : 0); $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp_key ]; my $interp_val = (substr($val,0,1) eq '"' ? 1 : 0); $val = substr($val, 1, -1); # dequote always quoted string $val =~ s/\\(['"\\])/$1/g; push @{ $FILECAT{$val} }, [ $filename, $line, '', $interp_val ]; } # Specific key foo => "...", #loc{foo} $line = 1; pos($_) = 0; while (m/\G(.*?(\w+|$re_delim)\s*=>\s*($re_delim)(?-s:.*?)\#$re_space_wo_nl*loc\{\2\}$re_space_wo_nl*)$/smgo) { my ( $all, $key, $val ) = ( $1, $2, $10 ); $line += ( $all =~ tr/\n/\n/ ); $seen{$line}++; unless ( defined $key && defined $val ) { warn "Couldn't process loc_pair at $filename:$line:\n $key\n $val\n"; next; } $val = substr($val, 1, -1); # dequote always quoted string $val =~ s/\\(['"])/$1/g; push @{ $FILECAT{$val} }, [ $filename, $line, '' ]; } # Check for ones we missed $line = 1; pos($_) = 0; while (m/\G(.*? \# $re_space_wo_nl* (loc (_\w+|\(\)|{$re_delim})?) $re_space_wo_nl* $)/smgox) { my ($all, $loc_type) = ($1, $2); $line += ( $all =~ tr/\n/\n/ ); next if $seen{$line}; print "\n" unless $errors++; print " $loc_type that did not match, line $line of $filename\n"; } if ($errors) { print "\n" } else { print "\r", " " x 100, "\r"; } close (_); } sub uniq { my %seen; return grep { !$seen{$_}++ } @_; } sub update { my $lang = shift; my $file = shift; unless (!-e $file or -w $file) { warn "Can't write to $lang, skipping...\n"; return; } my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ ); print "Updating $lang"; my $lexicon = Locale::PO->load_file_ashash( $file, "utf-8" ); # Default to the empty string for new ones $lexicon->{$_->msgid} ||= $_ for values %FILECAT; my $errors = 0; for my $msgid ( keys %{$lexicon} ) { my $entry = $lexicon->{$msgid}; # Don't output empty translations for english if (not length $entry->dequote($entry->msgstr) and $is_english) { delete $lexicon->{$msgid}; next; } # The PO properties at the top are always fine to leave as-is next if not length $entry->dequote($msgid); # Not found in source? Drop it my $source = $FILECAT{$msgid}; if (not $source) { delete $lexicon->{$msgid}; next; } # Pull in the properties from the source $entry->reference( $source->reference ); $entry->automatic( $source->automatic ); my $fail = validate_msgstr($lang, map {$entry->dequote($_)} $entry->msgid, $entry->msgstr); next unless $fail; print "\n" unless $errors++; print $fail; } my @order = map {$_->[0]} sort {$a->[1] cmp $b->[1]} map {[$_, $_->dequote($_->msgid)]} values %{$lexicon}; Locale::PO->save_file_fromarray($file, \@order, "utf-8") or die "Couldn't update '$file': $!"; if ($errors) { print "\n"; } else { print "\r", " "x100, "\r"; } return 1; } sub validate_msgstr { my $lang = shift; my $msgid = shift; my $msgstr = shift; return if not defined $msgstr or $msgstr eq ''; # no translation for this string # we uniq because a string can use a placeholder more than once # (eg %1 %quant(%1, ...) like in our czech localization my @expected_variables = uniq($msgid =~ /%\d+/g); my @got_variables = uniq($msgstr =~ /%\d+/g); # this catches the case where expected uses %1,%2 and got uses %1,%3 # unlike a simple @expected_variables == @got_variables my $expected = join ", ", sort @expected_variables; my $got = join ", ", sort @got_variables; return if $expected eq $got; return " expected (" . $expected . ") in msgid: $msgid\n" . " got (" . $got . ") in msgstr: $msgstr\n"; }