package SmilesScripts::Isomorphism;

use strict;
use warnings;

use Chemistry::OpenSMILES qw(
    clean_chiral_centers
    is_chiral_tetrahedral
    is_cis_trans_bond
);
use Chemistry::OpenSMILES::Stereo qw(
    chirality_to_pseudograph
    cis_trans_to_pseudoedges
    mark_all_double_bonds
);
use Chemistry::OpenSMILES::Parser;
use Chemistry::OpenSMILES::Writer qw( write_SMILES );
use Clone qw( clone );
use Data::Dumper;
use Graph;
use Graph::Nauty qw( are_isomorphic canonical_order orbits );
use List::Util qw( any );
use SmilesScripts::DiffMessage qw(
    aggregate_messages
    message
    message_isomorphism
);

use parent Exporter::;
our @EXPORT_OK = qw( smi_compare );

$Graph::Nauty::worksize = 25600;

sub smi_compare($$@)
{
    my( $first_smiles, $second_smiles, $options ) = @_;

    $options = {} unless $options;
    my( $check_isomorphism,
        $remove_alkali_bonds ) =
        ( $options->{check_isomorphism},
          $options->{remove_alkali_bonds} );

    my $pair_moieties_options = { check_isomorphism => $check_isomorphism };

    # Define the comparison strategy
    my $steps = [
        # Since removal of H atoms sometimes also dissolves chiral centers and
        # unsets cis/trans markers, removal of H atoms is performed later.
        { name => 'chirality',   func => \&remove_chirality },
        { name => 'cis/trans',   func => \&remove_bond_order,
                                 args => { orders => [ '/', '\\' ] } },
        { name => 'charge',      func => \&remove_charge },
        { name => 'order',       func => \&remove_bond_order,
                                 args => { orders => [ ':', '=', '#', '$' ] } },
        { name => 'aromaticity', func => \&remove_aromaticity },
        { name => 'H atoms',     func => \&remove_atoms, args => [ 'H' ] },
        { name => 'atom types',  func => \&remove_atom_types },
    ];
    if( $remove_alkali_bonds ) {
        push @$steps, { name => 'alkali bonds', func => \&remove_alkali_bonds };
    }
    my @checks = sort { my $aa = sprintf '%b', $a;
                        my $bb = sprintf '%b', $b;
                        ($a & (2**6))   <=> ($b & (2**6))   || # atom types is the last resort
                        ($a & (2**5))   <=> ($b & (2**5))   || # H atoms is quite destructive too
                        ($aa =~ s/1//g) <=> ($bb =~ s/1//g) || # least changes to the front
                         $a <=> $b }
                 0..(2**@$steps)-1;

    my $first_moiety  = parse_smiles( $first_smiles );
    my $second_moiety = parse_smiles( $second_smiles );

    my $nonmatching_single_moieties_achieved = 0;
    my @invariants;

    COMBINATION:
    for my $checks (@checks) {
        next if any { $checks & $_ } @invariants;

        # There is a problem with Graph 0.9723 causing loss of $.:
        # https://github.com/graphviz-perl/Graph/issues/26
        # Thus we have to go around it.
        my $line_no = $.;
        my $first_moiety_copy  = [ map { copy_moiety( $_ ) } @$first_moiety ];
        my $second_moiety_copy = [ map { copy_moiety( $_ ) } @$second_moiety ];
        $. = $line_no;

        my @modulo;
        for my $i (0..@$steps-1) {
            next unless $checks & (2**$i);
            # Do not perform the comparison unless at least one of the
            # molecules are touched by the simplification
            if( $steps->[$i]{func}( $first_moiety_copy,  $steps->[$i]{args} ) |
                $steps->[$i]{func}( $second_moiety_copy, $steps->[$i]{args} ) ) {
                push @modulo, $steps->[$i]{name};
            } else {
                push @invariants, $checks;
                next COMBINATION;
            }
        }

        eval {
            pair_moieties( $first_moiety_copy,
                           $second_moiety_copy,
                           $pair_moieties_options );
        };
        if( $@ ) {
            print STDERR "$@"; # TODO better reporting here
            next;
        }

        # Checking for single nonmatching moieties
        $nonmatching_single_moieties_achieved |=
            scalar @$first_moiety_copy  == 1 &&
            scalar @$second_moiety_copy == 1;

        # There are still unmatched moieties on both sides, further
        # reductions are needed
        next if @$first_moiety_copy && @$second_moiety_copy;

        my $reason;
        if( @modulo ) {
            $reason = 'isomorphic modulo ' . join( ', ', sort @modulo );
        } else {
            $reason = 'isomorphic';
        }
        if( @$first_moiety_copy || @$second_moiety_copy ) {
            if( $reason eq 'isomorphic' ) {
                $reason =  'isomorphic modulo superfluous moieties';
            } else {
                $reason .= ', superfluous moieties';
            }
            if( $options->{superfluous_moieties_side} ) {
                $reason .= ' on left'  if @$first_moiety_copy;
                $reason .= ' on right' if @$second_moiety_copy;
            }
        }
        return $reason;
    }

    my $reason = 'unknown';
    if( $nonmatching_single_moieties_achieved ) {
        $reason = 'nonmatching single moieties';
    }
    return $reason;
}

sub parse_smiles
{
    my ( $smiles ) = @_;
    my @smiles;
    eval {
        my $parser;
        $parser = Chemistry::OpenSMILES::Parser->new;
        @smiles = $parser->parse( $smiles, { max_hydrogen_count_digits => 2 } );

        for my $moiety (@smiles) {
            # Some versions of Chemistry::OpenSMILES specified H count of 0
            # for atoms represented in square brackets, thus the following is
            # executed to remove them just in case:
            for my $atom ($moiety->vertices) {
                delete $atom->{hcount};
            }
            my @orbits = orbits( $moiety, \&write_SMILES );
            my %orbits;
            for my $orbit (0..$#orbits) {
                for (@{$orbits[$orbit]}) {
                    $orbits{$_} = $orbit;
                }
            }

            my @removed = clean_chiral_centers( $moiety,
                                                sub { $orbits{$_[0]} } );
            next unless @removed;
            warn scalar @removed . ' tetrahedral chiral center(s) with ' .
                 'less than 4 distinct neighbours were removed.' . "\n";
        }
    };
    if( $@ ) {
        $@ =~ s/\.?\n$//;
        die "error parsing '$smiles': $@.\n";
    } else {
        return \@smiles;
    }
}

sub cleanup_empty_moieties
{
    my $removed = 0;
    for (@_) {
        my $count = scalar @$_;
        @$_ = grep { scalar $_->vertices } @$_;
        $removed += $count - scalar @$_;
    }
    if( $removed ) {
        warn "$removed empty moiety(es) removed from both " .
             "SMILES prior to comparison.\n";
    }
}

sub discover_split_moieties
{
    my ( $moieties ) = @_;

    if( $Graph::VERSION < 0.9717 ) {
        die 'Graph 0.9717 or later is needed to correctly split ' .
            'molecular graphs' . "\n";
    }

    my @moieties_now;
    for my $moiety (@$moieties) {
        my @connected = $moiety->connected_components;
        if( @connected == 1 ) {
            push @moieties_now, $moiety;
            next;
        }

        # Split the graph (moiety) in question into graphs each
        # consisting of a connected component. Graph module does
        # not contain a function or method to do so, thus it has
        # to be done using Graph::connected_components()
        for my $i (0..$#connected) {
            my $moiety_now = $moiety->copy;
            for my $j (0..$#connected) {
                next if $i == $j;
                $moiety_now->delete_vertices( @{$connected[$j]} );
            }
            push @moieties_now, $moiety_now;
        }
    }

    @$moieties = @moieties_now;
}

my @alkali_elements = qw( Li Be Na Mg K Ca Rb Sr Cs Ba Fr Ra );

sub remove_alkali_bonds
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $edge ($moiety->edges) {
            next unless any { $_ eq $edge->[0]->{symbol} ||
                              $_ eq $edge->[1]->{symbol} } @alkali_elements;
            $moiety->delete_edge( @$edge );
            $changed = 1;
            for my $atom (@$edge) {
                next unless is_chiral_tetrahedral( $atom );
                delete $atom->{chirality};
                delete $atom->{chirality_neighbours};
            }
        }
    }

    return $changed unless $changed;

    discover_split_moieties( $moieties );
    return $changed;
}

sub remove_atoms
{
    my ( $moieties, $atoms ) = @_;

    my $changed = 0;
    my $maybe_split_moiety = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next unless any { ucfirst $vertex->{symbol} eq $_ } @$atoms;
            $maybe_split_moiety |= $moiety->degree( $vertex ) >= 2;
            for my $neighbour ($moiety->neighbours( $vertex )) {
                next unless is_chiral_tetrahedral( $neighbour );
                # All neighbouring chiral tetrahedral atoms have to lose
                # their chirality status
                delete $neighbour->{chirality};
                delete $neighbour->{chirality_neighbours};
            }
            $moiety->delete_vertex( $vertex );
            $changed = 1;
        }
    }

    return $changed unless $changed;

    discover_split_moieties( $moieties ) if $maybe_split_moiety;
    cleanup_empty_moieties(  $moieties );

    return $changed;
}

sub remove_charge
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next if ! exists $vertex->{charge};
            delete $vertex->{charge};
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_chirality
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next if ! exists $vertex->{chirality};
            delete $vertex->{chirality};
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_aromaticity
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next if $vertex->{symbol} eq ucfirst $vertex->{symbol};
            $vertex->{symbol} = ucfirst $vertex->{symbol};
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_bond_order
{
    my ( $moieties, $options ) = @_;

    my $message = 'order';
    my @orders;

    $options = {} unless $options;
    $message = $options->{message} if $options->{message};
    @orders = @{$options->{orders}} if $options->{orders};

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $edge ($moiety->edges) {
            next if !$moiety->has_edge_attributes( @$edge );
            if( @orders &&
                !grep { $moiety->get_edge_attribute( @$edge, 'bond' ) eq $_ }
                      @orders ) {
                next;
            }
            $moiety->delete_edge_attributes( @$edge );
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_atom_types
{
    my ( $moieties ) = @_;

    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            $vertex->{symbol} = 'X';
        }
    }

    return 1;
}

sub pair_moieties
{
    my( $A, $B, $options ) = @_;

    $options = {} unless $options;

    my %A_depictions;
    my %A_quantities;
    for (@$A) {
        my $depiction = canonical_depiction( $_ );

        $A_quantities{$depiction}++;
        push @{$A_depictions{$depiction}}, $_;

        if( $options->{check_isomorphism} &&
            $A_quantities{$depiction} > 1 &&
            !are_isomorphic( unpack_molecular_graph( $A_depictions{$depiction}[-2] ),
                             unpack_molecular_graph( $A_depictions{$depiction}[-1] ),
                             \&depict_unpacked_vertex ) ) {
            warn "graphs for '$depiction' were found to be not isomorphic\n";
        }
    }

    my %B_depictions;
    my %B_quantities;
    for (@$B) {
        my $depiction = canonical_depiction( $_ );

        $B_quantities{$depiction}++;
        push @{$B_depictions{$depiction}}, $_;

        if( $options->{check_isomorphism} &&
            $B_quantities{$depiction} > 1 &&
            !are_isomorphic( unpack_molecular_graph( $B_depictions{$depiction}[-2] ),
                             unpack_molecular_graph( $B_depictions{$depiction}[-1] ),
                             \&depict_unpacked_vertex ) ) {
            warn "graphs for '$depiction' were found to be not isomorphic\n";
        }
    }

    my( $only_A, $only_B, $common ) =
        comm( [ keys %A_depictions ], [ keys %B_depictions ] );
    if( !@$only_A && !@$only_B ) {
        for (@$common) {
            if( $A_quantities{$_} != $B_quantities{$_} ) {
                warn "different number of moieties of '$_', " .
                     "$A_quantities{$_} vs. $B_quantities{$_}\n";
            }

            if( $options->{check_isomorphism} &&
                !are_isomorphic( unpack_molecular_graph( $A_depictions{$_}[0] ),
                                 unpack_molecular_graph( $B_depictions{$_}[0] ),
                                 \&depict_unpacked_vertex ) ) {
                warn "graphs for '$_' were found to be not isomorphic\n";
            }
        }
    }
    @$A = map { @{$A_depictions{$_}} } @$only_A;
    @$B = map { @{$B_depictions{$_}} } @$only_B;
}

sub canonical_depiction
{
    my( $graph, $color_sub ) = @_;

    $color_sub = \&write_SMILES unless $color_sub;

    my $drop_chirality = sub {
        my( $vertex ) = @_;

        return '' unless %$vertex;

        my %atom = %$vertex;
        delete $atom{chirality};
        return $color_sub->( \%atom );
    };

    my $order_sub = sub {
        return exists $_[0]->{number} ? $_[0]->{number} : -1;
    };

    # FIXME: This code is copied from smi_canonicalise. A more effective
    # solution should be found instead of duplicating the code.

    my $copy  = unpack_molecular_graph( $graph );
    my @order = canonical_order( $copy, $drop_chirality, $order_sub );
    my %order;
    for (0..$#order) {
        $order{$order[$_]} = $_;
    }

    # Drop cis/trans markers from the input graph and mark them
    # anew.
    for my $bond ($graph->edges) {
        next unless is_cis_trans_bond( $graph, @$bond );
        $graph->delete_edge_attribute( @$bond, 'bond' );
    }
    mark_all_double_bonds( $graph,
                           sub {
                                if( $copy->has_edge( $_[0], $_[3] ) &&
                                    $copy->has_edge_attribute( $_[0], $_[3], 'pseudo' ) ) {
                                    return $copy->get_edge_attribute( $_[0], $_[3], 'pseudo' );
                                }
                           },
                           sub { return $order{$_[0]} } );

    my $smiles = write_SMILES(
        $graph,
        {
            order_sub => 
                sub {
                    my @sorted = sort { $order{$a} <=> $order{$b} }
                                      keys %{$_[0]};
                    return $_[0]->{shift @sorted};
                },
        } );

    # A.M.: I cannot find a counter-example, thus the following seems
    # reasonable to me. In a SMILES descriptor, one can substitute all
    # '/' with '\' and vice versa, and retain correct cis/trans settings.
    if( $smiles =~ /([\/\\])/ && $1 eq '\\' ) {
        $smiles =~ tr/\/\\/\\\//;
    }

    return $smiles;
}

sub comm
{
    my( $A, $B ) = @_;

    my @A = sort @$A;
    my @B = sort @$B;

    my( @only_A, @only_B, @common );
    while( @A && @B ) {
        if( $A[0] eq $B[0] ) {
            push @common, shift @A;
            shift @B;
            next;
        }

        if( $A[0] lt $B[0] ) {
            push @only_A, shift @A;
            next;
        }

        if( $A[0] gt $B[0] ) {
            push @only_B, shift @B;
            next;
        }
    }
    push @only_A, @A;
    push @only_B, @B;

    return \@only_A, \@only_B, \@common;
}

# "Unpacks" parsed SMILES graph by converting cis/trans bonds to
# pseudoedges and chirality markers to pseudovertices with their own
# pseudoedges.
sub unpack_molecular_graph
{
    my( $graph ) = @_;

    my $copy = $graph->copy;
    for my $bond ($graph->edges) {
        next unless $graph->has_edge_attribute( @$bond, 'bond' );
        $copy->set_edge_attribute( @$bond,
                                   'bond',
                                   $graph->get_edge_attribute( @$bond, 'bond' ) );
    }
    cis_trans_to_pseudoedges( $copy );
    chirality_to_pseudograph( $copy );

    return $copy;
}

sub depict_unpacked_vertex
{
    my( $vertex ) = @_;

    if( ref $vertex eq 'HASH' && exists $vertex->{symbol} ) {
        $vertex = { %$vertex };
        delete $vertex->{chirality};
        return write_SMILES( $vertex );
    }

    return Dumper $vertex;
}

sub copy_moiety
{
    my( $moiety ) = @_;

    # Prior to v0.9723, Graph had a bug in deeply copied graphs with references as vertices.
    # Therefore an ad-hoc method is used to deeply copy graphs on older systems.
    return $moiety->deep_copy if $Graph::VERSION >= 0.9723;

    my @vertices_orig = $moiety->vertices;
    my @vertices_copy = map { clone $_ } @vertices_orig;
    my %orig_to_copy = map { $vertices_orig[$_] => $vertices_copy[$_] }
                           0..$#vertices_orig;

    for my $i (0..$#vertices_copy) {
        next unless $vertices_copy[$i]->{chirality_neighbours};
        @{$vertices_copy[$i]->{chirality_neighbours}} =
            map { $orig_to_copy{$_} }
                @{$vertices_orig[$i]->{chirality_neighbours}};
    }

    my $copy = Graph::Undirected->new( refvertexed => 1 );
    $copy->add_vertices( @vertices_copy );

    for my $edge ($moiety->edges) {
        $copy->add_edge( map { $orig_to_copy{$_} } @$edge );
        if( $moiety->has_edge_attributes( @$edge ) ) {
            $copy->set_edge_attributes( ( map { $orig_to_copy{$_} } @$edge ),
                                        clone $moiety->get_edge_attributes( @$edge ) );
        }
    }

    return $copy;
}

1;
