package Config::Model::Dpkg::Copyright ;

use strict;
use warnings;

use 5.20.0;
use IO::Pipe;

use feature qw/postderef signatures/;
no warnings qw/experimental::postderef experimental::signatures/;

use base qw/Config::Model::Node/;
use Path::Tiny;
use Data::Dumper;

use Config::Model::DumpAsData;
use Dpkg::Copyright::Scanner qw/scan_files __squash __pack_files __create_tree_leaf_from_paths/;
use Software::LicenseUtils;
use Scalar::Util qw/weaken/;
use Storable qw/dclone/;

my $join_path = "\n "; # used to group Files

sub get_joined_path ($self, $paths) {
    return join ($join_path, sort @$paths);
}

sub split_path ($self,$path) {
    return  sort ( ref $path ? @$path : split ( /[\s\n]+/ , $path ) );
}
sub normalize_path ($self,$path) {
    my @paths = $self->split_path($path);
    return $self->get_joined_path(\@paths);
}

my $dumper = Config::Model::DumpAsData->new;

# $args{in} can contains the output of licensecheck (for tests)
sub update ($self, %args) {

    my $files_obj = $self->grab("Files");

    # explode existing path data to track deleted paths
    my %old_split_files;
    my %debian_paths;
    foreach my $paths_str ($files_obj->fetch_all_indexes) {
        my $node = $files_obj->fetch_with_id($paths_str) ;
        my $data = $dumper->dump_as_data( node => $node );

        if ($paths_str =~ m!^debian/!) {
            $debian_paths{$paths_str} = $data;
        }
        else {
            foreach my $path ($self->split_path($paths_str)) {
                $old_split_files{$path} = $data ;
            }
        }
    }

    my ($files, $copyrights_by_id) = scan_files( %args );

    # explode new data and merge with existing entries
    my %new_split_files;
    my @data;
    my %data_keys;
    foreach my $path ( sort keys $files->%* ) {
        my ($c, $l) = $copyrights_by_id->[ $files->{$path} ]->@*;

        my $new_data = dclone (delete $old_split_files{$path} || {} );
        my $old_cop = $new_data->{Copyright};
        my $old_lic = $new_data->{License}{short_name};
        # say "load '$path' with '$c' ('$l') old '$old_cop' ('$old_lic')";
        # clobber old data
        $new_data->{Copyright} = $c if ($c !~ /no-info-found|UNKNOWN/ or not $old_cop);
        $new_data->{License}{short_name} = $l if ($l ne 'UNKNOWN');

        # create an inventory of different file copyright and license data
        my $dumper = Data::Dumper->new([$new_data])->Sortkeys(1)->Indent(0);
        my $datum_dump = $dumper->Dump;
        my $d_key = $data_keys{$datum_dump};
        if (not defined $d_key) {
            push @data,$new_data;
            $d_key = $data_keys{$datum_dump} = $#data;
        }

        # explode path in subpaths and store id pointing to copyright data in there
        __create_tree_leaf_from_paths(\%new_split_files, $path, $d_key);
    }

    # at this point:
    # * @data contains a list of copyright/license data
    # * %new_split_files contains a tree matching a directory tree where each leaf
    #   is an integer index referencing
    #   an entry in @data to get the correct  copyright/license data
    # * %old_split_files contains paths no longer present. Useful to trace deleted files

    my $current_dir = $args{from_dir} || path('.');

    my %preserved_path;
    # warn about old files (data may be redundant or obsolete though)
    foreach my $old_path (sort keys %old_split_files) {
        # put back data matching an existing dir
        if ($old_path eq '*' or ($old_path =~ m!(.*)/\*$! and $current_dir->is_dir($1))) {
            $preserved_path{$old_path} = delete $old_split_files{$old_path};
        }
        else {
            say "Note: '$old_path' was removed from new upstream source";
        }
    }

    $self->_prune_old_dirs(\%new_split_files, \%old_split_files) ;


    # implode files entries with same data index
    __squash(\%new_split_files) ;

    # pack files by copyright id
    my @packed = __pack_files(\%new_split_files);

    # delete existing data in config tree. A more subtle solution to track which entry is
    # deleted or altered (when individual files are removed, renamed) is too complex.
    $files_obj->clear;

    # count license useage to dedice whether to add a global license
    # or a single entry. Skip unknown or public-domain licenses
    my %lic_usage_count;
    map { $lic_usage_count{$_}++ if $_ and not /unknown|public/i}
        map {split /\s+or\s+/, $data[$_->[0]]->{License}{short_name} // ''; }
        @packed ;

    # load new data in config tree
    foreach my $p (@packed) {
        my ($id, @paths) = $p->@*;

        if ($paths[0] =~ /\.$/) {
            if (@paths > 1) {
                die "Internal error: can't have dir path with file path: @paths";
            }
            my $p = $paths[0];
            $p =~ s/\.$/*/;
            my $old_data = delete $preserved_path{$p};
            say "old dir data for $p overridden" if $old_data;
            next;
        };
        my $datum = dclone($data[$id]);
        my $path_str = $self->normalize_path(\@paths);
        my $l = $datum->{License}{short_name};

        next unless $l ;

        my $norm_path_str = $self->normalize_path(\@paths);

        # if full_license is not provided in datum, check global license(s)
        if (not $datum->{License}{full_license}) {
            my $ok = 0;
            my @sub_licenses = split /\s+or\s+/,$l;
            my $lic_count = 0;
            my @empty_licenses = grep {
                my $text = $self->grab_value(qq!License:"$_" text!) ;
                $ok++ if $text;
                $lic_count += $lic_usage_count{$_} // 0 ;
                not $text; # to get list of empty licenses
            } @sub_licenses;

            if ($ok ne @sub_licenses) {
                my $filler = "Please fill license $l from header of @paths";
                if ($lic_count > 1 ) {
                    say "Adding dummy global license text for license $l for path @paths";
                    map { $self->load(qq!License:"$_" text="$filler"!) } @empty_licenses ;

                }
                else {
                    say "Adding dummy license text for license $l for path @paths";
                    $datum->{License}{full_license} = $filler;
                }
            }

        }

        $files_obj->fetch_with_id($path_str)->load_data( $datum );
    }

    # delete global license without text
    my $global_lic_obj = $self->fetch_element('License');
    foreach my $l ($global_lic_obj->fetch_all_indexes) {
        $global_lic_obj->delete($l)
            unless $global_lic_obj->fetch_with_id($l)->fetch_element_value('text');
    }

    # put back preserved data
    foreach my $old_path (sort keys %preserved_path) {
        say "Note: preserving entry '$old_path'" ;
        $files_obj->fetch_with_id($old_path)->load_data( $preserved_path{$old_path} );
    }

    # put back debian data
    foreach my $deb_path (sort keys %debian_paths) {
        $files_obj->fetch_with_id($deb_path)->load_data( $debian_paths{$deb_path} );
    }

    # read a debian/fix.scanned.copyright file to patch scanned data
    my $debian = $current_dir->child('debian'); # may be missing in test environment
    if ($debian->is_dir) {
        $debian->children(qr/fix\.scanned\.copyright$/);
        my @fixes = $current_dir->child('debian')->children(qr/fix\.scanned\.copyright$/);
        say "Note: loading @fixes fixes from copyright fix files" if @fixes;
        foreach my $fix ( @fixes) {
            my @l = grep { /[^\s]/ } grep { ! m!^(#|//)!  } $fix->lines_utf8;
            $self->load( join('',@l) );
        }
    }

    # normalized again after all the modifications
    $self->load("Files:.sort");

    return ''; # improve returned message ?
}

sub _prune_old_dirs ($self, $h, $old_dirs, $path = [] ) {

    # recurse in the data structure
    foreach my $name (sort keys %$h) {
        my $item = $h->{$name};
        if (ref($item)) {
            $self->_prune_old_dirs($item, $old_dirs, [ $path->@*, $name ]);
        }
    }

    # delete current directory entry
    my $dir_path = join('/', $path->@*,'*');
    if ($old_dirs->{$dir_path}) {
        say "Removing old entry $dir_path";
        delete $old_dirs->{$dir_path};
    }
}


sub fill_global_license ($self, $l, $text) {

    #say "Adding global license $l";
    # handle the case where license is something like GPL-2 or GPL-3
    my @names = $l =~ / or / ? split / or /, $l : ($l);

    # try to fill text of a known license
    foreach my $name (@names) {
        my $license_object ;
        eval {
            $license_object = Software::LicenseUtils->new_from_short_name( {
                short_name => $name,
                holder => 'X. Ample'
            }) ;
        };
        if ($license_object) {
            $self->load(qq!License:$name!); # model will fill the text
        }
        else {
            $self->load(qq!License:$name text:"$text"!);
        }
    }
}

1;

__END__

=encoding utf8

=head1 NAME

Config::Model::Dpkg::Copyright - Fill the File sections of debian/copyright file

=head1 SYNOPSIS

 # this modules is used by cme when invoked with this command
 $ cme update dpkg-copyright

=head1 DESCRIPTION

This commands helps with the tedious task of maintening
C<debian/copyright> file. When you package a new release of a
software, you can run C<cme update dpkg-copyright> to update the
content of the copyright file.

This command scans current package directory to extract copyright and
license information and store them in the Files sections of
debian/copyright file.

In debian package directory:

* run 'cme update dpkg-copyright' or 'cme update dpkg'
* check the result with your favorite VCS diff tool. (you do use
  a VCS for your package files, do you ?)

Note: this command is experimental.

=head1 Tweak results

Since the extraction of copyright information from source file is
based on comments, the result is sometimes lackluster. Your may
specify instruction to alter or set specific copyright entries in
C<debian/fix.scanned.copyright> file
(or C<< debian/<source-package>.fix.scanned.copyright >>).
Each line of this file will be handled
by L<Config::Model::Loader> to modify copyright information.

=head2 Example

If the extracted copyright contains:

 Files: *
 Copyright: 2014-2015, Adam Kennedy <adamk@cpan.org> "foobar
 License: Artistic or GPL-1+

You may add this line in C<debian/fix.copyright> file:

 ! Files:'*' Copyright=~s/\s*".*//

This way, the copyright information will be updated from the file
content but the extra C<"foobar> will always be removed during
updates.

Comments are accepted in Perl and C++ style from the beginning of the line.
Lines breaks are ignored.

Here's another more complex example:

 // added a global license, MIT license text is filled by Config::Model
 ! copyright License:MIT

 # don't forget '!' to go back to tree root
 ! copyright Files:"pan/general/map-vector.h" Copyright="2001,Andrei Alexandrescu"
   License short_name=MIT
 # delete license text since short_name points to global  MIT license
   full_license~

 # use a loop there vvvvvv to clean up that vvvvvvvvvvvvvvvvvvvvvvv in all copyrights
 ! copyright   Files:~/.*/     Copyright=~s/all\s*rights\s*reserved//i

 # defeat spammer by replacing all '@' in emails of 3rdparty files
 # the operation :~/^3party/ loops over all Files entries that match ^3rdparty
 # and modify the copyright entry with a Perl substitution
 ! Files:~/^3rdparty/ Copyright=~s/@/(at)/

=head1 Under the hood

This section explains how cme merges the information from the existing
C<debian/copyright> file (the "old" information) with the information
extracted by I<licensecheck> (the "new" information):

=over

=item *

The old and new information are compared in the form of file lists:

=over

=item *

New file entries are kept as is in the new list.

=item *

When a file entry is found in both old and new lists, the new © and
license short names are checked. If they are unknown, the information
from the old list is copied in the new list.

=item *

Old files entries not found in the new list are deleted.

=back

=item *

File entries are coalesced in the new list to reduce redundancies (this mechanism is explained in this L<blog|https://ddumont.wordpress.com/2015/04/05/improving-creation-of-debian-copyright-file>)

=item *

License entries are created, either attached to Files specification or as global licenses. License text is added for known license (actually known by L<Software::License>)

=item *

Directories (path ending with C</*>) from old list then checked:

=over

=item *

Directory is found in the new list: the old information is clobbered by new information.

=item *

Directory not found in new list but exists: the old information is copied in the new list.

=item *

Directory is not found: the old information is discarded

=back

=item *

Files entries are sorted and the new C<debian/copyright> is generated.

=back


=head1 AUTHOR

Dominique Dumont <dod@debian.org>

=cut
