#!/usr/bin/env perl
# BEGIN BPS TAGGED BLOCK {{{
#
# COPYRIGHT:
#
# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
#                                          <sales@bestpractical.com>
#
# (Except where explicitly superseded by other copyright notices)
#
#
# LICENSE:
#
# This work is made available to you under the terms of Version 2 of
# the GNU General Public License. A copy of that license should have
# been provided with this software, but in any event can be snarfed
# from www.gnu.org.
#
# This work is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 or visit their web page on the internet at
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
#
#
# CONTRIBUTION SUBMISSION POLICY:
#
# (The following paragraph is not intended to limit the rights granted
# to you to modify and distribute this software under the terms of
# the GNU General Public License and is only of importance to you if
# you choose to contribute your changes and enhancements to the
# community by submitting them to Best Practical Solutions, LLC.)
#
# By intentionally submitting any modifications, corrections or
# derivatives to this work, or any other work intended for use with
# Request Tracker, to Best Practical Solutions, LLC, you confirm that
# you are the copyright holder for those contributions and you grant
# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
# royalty-free, perpetual, license to use, copy, create derivative
# works based on those contributions, and sublicense and distribute
# those contributions and any derivatives thereof.
#
# END BPS TAGGED BLOCK }}}
# Portions Copyright 2002 Autrijus Tang <autrijus@autrijus.org>

use strict;
use warnings;

use utf8;
use open qw/ :std :encoding(UTF-8) /;

use File::Find;
use File::Copy;
use Regexp::Common;
use Carp;

# po dir is for extensions
@ARGV = (<share/po/*.po>, <share/po/*.pot>, <po/*.po>, <po/*.pot>) unless @ARGV;

our %FILECAT;

# extract all strings and stuff them into %FILECAT
# scan html dir for extensions
File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin sbin lib share html etc) );

# ensure proper escaping and [_1] => %1 transformation
foreach my $str ( sort keys %FILECAT ) {
    my $entry = $FILECAT{$str};
    my $oldstr = $str;

    $str =~ s/\\/\\\\/g;
    $str =~ s/"/\\"/g;
    $str =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
    $str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
    $str =~ s/~([\[\]])/$1/g;

    delete $FILECAT{$oldstr};
    $FILECAT{$str} = $entry;
}

# update all language dictionaries
foreach my $dict (@ARGV) {
    $dict = "share/po/$dict.pot" if ( $dict eq 'rt' );
    $dict = "share/po/$dict.po" unless -f $dict or $dict =~ m!/!;

    my $lang = $dict;
    $lang =~ s|.*/||;
    $lang =~ s|\.po$||;
    $lang =~ s|\.pot$||;

    update($lang, $dict);
}

# warn about various red flags in loc strings
foreach my $str ( sort keys %FILECAT ) {
    my $entry = $FILECAT{$str};
    my $entry_count = @$entry;

    # doesn't exist in the current codebase, ignore for now
    next if $entry_count == 0;

    my ($filename, $line) = @{ $entry->[0] };

    my $location = "$filename line $line" . ($entry_count > 1 ? " (and ".($entry_count-1)." other places)" : "");

    if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) {
        warn "Extraneous whitespace in '$str' at $location\n";
    }

    if (grep {$_->[3]} @{$entry} and $str =~ /([\$\@]\w+)/) {
        warn "Interpolated variable '$1' in '$str' at $location\n";
    }
}


sub extract_strings_from_code {
    my $file = $_;

    local $/;
    return if ( -d $_ || !-e _ );
    return
      if ( $File::Find::dir =~
        qr!lib/blib|lib/t/autogen|var|m4|local|share/fonts! );
    return if ( /\.(?:pot|po|bak|gif|png|psd|jpe?g|svg|css|js)$/ );
    return if ( /~|,D|,B$|extract-message-catalog$|tweak-template-locstring$/ );
    return if ( /StyleGuide.pod/ );
    return if ( /^[\.#]/ );
    return if ( -f "$_.in" );

    print "Looking at $File::Find::name\n";
    my $filename = $File::Find::name;
    $filename =~ s'^\./'';
    $filename =~ s'\.in$'';

    unless (open _, '<', $file) {
        print "Cannot open $file for reading ($!), skipping.\n";
        return;
    }

    my $re_space_wo_nl = qr{(?!\n)\s};
    my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx;
    my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx;
    my $re_loc_paren_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc \(\) $re_space_wo_nl* $}mx;
    my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx;
    my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx;
    my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep};

    $_ = <_>;

    # Mason filter: <&|/l>...</&> and <&|/l_unsafe>...</&>
    my $line = 1;
    while (m!\G(.*?<&\|/l(?:_unsafe)?(.*?)&>(.*?)</&>)!sg) {
        my ( $all, $vars, $str ) = ( $1, $2, $3 );
        $vars =~ s/[\n\r]//g;
        $line += ( $all =~ tr/\n/\n/ );
        $str =~ s/\\(['"\\])/$1/g;
        #print "STR IS $str\n";
        push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
    }

    # Localization function: loc(...)
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?\bloc$RE{balanced}{-parens=>'()'}{-keep})/sg) {
        my ( $all, $match ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );

        my ( $vars, $str );
        next unless ( $match =~ /\(\s*($re_delim)(.*?)\s*\)$/so );

        my $interp = (substr($1,0,1) eq '"' ? 1 : 0);
        $str = substr( $1, 1, -1 );       # $str comes before $vars now
        $vars = $9;

        $vars =~ s/[\n\r]//g;
        $str  =~ s/\\(['"\\])/$1/g;

        push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ];
    }

    my %seen;
    # Comment-based mark: "..." # loc
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?($re_delim)[ \{\}\)\],;]*$re_loc_suffix)/smgo) {
        my ( $all, $str ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $str ) {
            warn "Couldn't process loc at $filename:$line:\n  str«$str»\n";
            next;
        }
        my $interp = (substr($str,0,1) eq '"' ? 1 : 0);
        $str = substr($str, 1, -1);
        $str =~ s/\\(['"\\])/$1/g;
        push @{ $FILECAT{$str} }, [ $filename, $line, '', $interp ];
    }

    # Comment-based mark for list to loc():  ("...", $foo, $bar)  # loc()
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*? $RE{balanced}{-parens=>'()'}{-keep} [ \{\}\)\],;]* $re_loc_paren_suffix)/sgx) {
        my ( $all, $match ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );

        my ( $vars, $str );
        unless ( $match =~
                /\(\s*($re_delim)(.*?)\s*\)$/so ) {
            warn "Failed to match delimited against $match, line $line";
            next;
        }

        my $interp = (substr($1,0,1) eq '"' ? 1 : 0);
        $str = substr( $1, 1, -1 );       # $str comes before $vars now
        $vars = $9;
        $seen{$line}++;

        $vars =~ s/[\n\r]//g;
        $str  =~ s/\\(['"\\])/$1/g;

        push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ];
    }

    # Comment-based qw mark: "qw(...)" # loc_qw
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(?:qw\(([^)]+)\)[ \{\}\)\],;]*)?$re_loc_qw_suffix)/smgo) {
        my ( $all, $str ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $str ) {
            warn "Couldn't process loc_qw at $filename:$line:\n  str«$str»\n";
            next;
        }
        foreach my $value (split ' ', $str) {
            push @{ $FILECAT{$value} }, [ $filename, $line, '' ];
        }
    }

    # Comment-based left pair mark: "..." => ... # loc_left_pair
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) {
        my ( $all, $key ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $key ) {
            warn "Couldn't process loc_left_pair at $filename:$line:\n  key«$key»\n";
            next;
        }
        my $interp = (substr($key,0,1) eq '"' ? 1 : 0);
        $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string
        push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp ];
    }

    # Comment-based pair mark: "..." => "..." # loc_pair
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(?:(\w+|$re_delim)\s*=>\s*($re_delim)[ \{\}\)\],;]*)?$re_loc_pair_suffix)/smgo) {
        my ( $all, $key, $val ) = ( $1, $2, $10 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $key && defined $val ) {
            warn "Couldn't process loc_pair at $filename:$line:\n  key«$key»\n  val«$val»\n";
            next;
        }
        my $interp_key = (substr($key,0,1) eq '"' ? 1 : 0);
        $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string
        push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp_key ];

        my $interp_val = (substr($val,0,1) eq '"' ? 1 : 0);
        $val = substr($val, 1, -1);    # dequote always quoted string
        $val  =~ s/\\(['"\\])/$1/g;
        push @{ $FILECAT{$val} }, [ $filename, $line, '', $interp_val ];
    }

    # Check for ones we missed
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*? \# $re_space_wo_nl* (loc (_\w+|\(\))?) $re_space_wo_nl* $)/smgox) {
        my ($all, $loc_type) = ($1, $2);
        $line += ( $all =~ tr/\n/\n/ );
        next if $seen{$line};
        warn "$loc_type that did not match, line $line of $filename\n";
    }

    close (_);
}

sub uniq {
    my %seen;
    return grep { !$seen{$_}++ } @_;
}

sub update {
    my $lang = shift;
    my $file = shift;
    my ( %Lexicon, %Header);
    my $out = '';

    unless (!-e $file or -w $file) {
        warn "Can't write to $lang, skipping...\n";
        return;
    }

    print "Updating $lang...\n";

    my @lines;
    @lines = (<LEXICON>) if open LEXICON, '<', $file;
    @lines = grep { !/^(#(:|\.)\s*|$)/ } @lines;
    while (@lines) {
        my $msghdr = "";
        $msghdr .= shift @lines while ( $lines[0] && $lines[0] !~ /^(#~ )?msgid/ );
        
        my $msgid  = "";

# '#~ ' is the prefix of launchpad for msg that's not found the the source
# we'll remove the prefix later so we can still show them with our own mark

        $msgid .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgid|")/ );
        my $msgstr = "";
        $msgstr .= shift @lines while ( $lines[0] && $lines[0] =~ /^(#~ )?(msgstr|")/ );

        last unless $msgid;

        chomp $msgid;
        chomp $msgstr;

        $msgid  =~ s/^#~ //mg;
        $msgstr =~ s/^#~ //mg;

        $msgid  =~ s/^msgid "(.*)"\s*?$/$1/m    or warn "$msgid in $file";

        if ( $msgid eq '' ) {
            # null msgid, msgstr will have head info
            $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/ms or warn "$msgstr  in $file";
        }
        else {
            $msgstr =~ s/^msgstr "(.*)"\s*?$/$1/m or warn "$msgstr  in $file";
        }

        if ( $msgid ne ''  ) {
            for my $msg ( \$msgid, \$msgstr ) {
                if ( $$msg =~ /\n/ ) {
                    my @lines = split /\n/, $$msg;
                    $$msg =
                      shift @lines;   # first line don't need to handle any more
                    for (@lines) {
                        if (/^"(.*)"\s*$/) {
                            $$msg .= $1;
                        }
                    }
                }

                # convert \\n back to \n
                $$msg =~ s/(?!\\)\\n/\n/g;
            }
        }

        $Lexicon{$msgid} = $msgstr;
        $Header{$msgid}  = $msghdr;

        validate_msgstr($lang, $msgid, $msgstr);
    }

    my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );

    foreach my $str ( keys %FILECAT ) {
        $Lexicon{$str} ||= '';
    }
    foreach ( sort keys %Lexicon ) {
        my $f = join ( ' ', sort map $_->[0].":".$_->[1], @{ $FILECAT{$_} } );
        my $nospace = $_;
        $nospace =~ s/ +$//;

        if ( !$Lexicon{$_} and $Lexicon{$nospace} ) {
            $Lexicon{$_} =
              $Lexicon{$nospace} . ( ' ' x ( length($_) - length($nospace) ) );
        }

        next if !length( $Lexicon{$_} ) and $is_english;

        my %seen;
        $out .= $Header{$_} if exists $Header{$_};



        next if (!$f && $_ && !$Lexicon{$_});
        if ( $f && $f !~ /^\s+$/ ) {

            $out .= "#: $f\n";
        }
        elsif ($_) {
            $out .= "#: NOT FOUND IN SOURCE\n";
        }
        foreach my $entry ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $FILECAT{$_} } ) {
            my ( $file, $line, $var ) = @{$entry};
            $var =~ s/^\s*,\s*//;
            $var =~ s/\s*$//;
            $out .= "#. ($var)\n" unless $seen{$var}++;
        }
        $out .= 'msgid ' . fmt($_) . "msgstr \"$Lexicon{$_}\"\n\n";
    }

    open PO, '>', $file or die "Couldn't open '$file' for writing: $!";
    print PO $out;
    close PO;

    return 1;
}

sub validate_msgstr {
    my $lang   = shift;
    my $msgid  = shift;
    my $msgstr = shift;

    return if $msgstr eq ''; # no translation for this string
    return if $msgid eq '';  # special case: po file metadata

    # we uniq because a string can use a placeholder more than once
    # (eg %1 %quant(%1, ...) like in our czech localization
    my @expected_variables = uniq($msgid =~ /%\d+/g);
    my @got_variables = uniq($msgstr =~ /%\d+/g);

    # this catches the case where expected uses %1,%2 and got uses %1,%3
    # unlike a simple @expected_variables == @got_variables
    my $expected = join ", ", sort @expected_variables;
    my $got      = join ", ", sort @got_variables;
    return 0 if $expected eq $got;

    warn "$lang string does not have the same placeholders\n";
    warn "expected (" . $expected . ") in msgid: $msgid\n";
    warn "     got (" . $got      . ") in msgstr: $msgstr\n\n";

    return 1;
}

sub escape {
    my $text = shift;
    $text =~ s/\b_(\d+)/%$1/;
    return $text;
}

sub fmt {
    my $str = shift;
    return "\"$str\"\n" unless $str =~ /\n/;

    my $multi_line = ($str =~ /\n(?!\z)/);
    $str =~ s/\n/\\n"\n"/g;

    if ($str =~ /\n"$/) {
        chop $str;
    }
    else {
        $str .= "\"\n";
    }
    return $multi_line ? qq(""\n"$str) : qq("$str);
}
