#!/usr/bin/perl

use strict;
use warnings;

use Chemistry::OpenSMILES::Parser;
use Encode qw( encode );
use File::Basename qw( basename );
use Getopt::Long::Descriptive;

my $basename = basename $0;
my( $opt, $usage ) = describe_options( <<"END" . 'OPTIONS',
USAGE
    $basename [<args>] [<files>]

DESCRIPTION
    $basename reads in SMILES and outputs a summary chemical formula for
    each SMILES entry.

END
    [ 'distinguish-hydrogen-isotopes',
      'output deuterium as D and tritium as T' ],
    [],
    [ 'help', 'print usage message and exit', { shortcircuit => 1 } ],
);

if( $opt->help ) {
    print $usage->text;
    exit;
}

my $errors = 0;
while (<>) {
    chomp;
    utf8::decode( $_ );

    my $additional_position = $1 if s/\t([^\t]*)$//;

    my @moieties;
    my $parser = Chemistry::OpenSMILES::Parser->new;
    eval {
        @moieties = $parser->parse( $_ );
    };
    if( $@ ) {
        $@ =~ s/^[^:]+:\s*// if !index( $@, $0 );
        $additional_position = defined $additional_position
                                     ? ' ' . $additional_position
                                     : '';
        print STDERR sprintf '%s: %s(%d)%s: %s',
                             $0,
                             $ARGV,
                             $.,
                             encode( 'utf8', $additional_position ),
                             encode( 'utf8', $@ );
        $errors++;
        next;
    }

    my %formula;
    for my $moiety (@moieties) {
        for my $atom ($moiety->vertices) {
            my $symbol = ucfirst $atom->{symbol};
            if( $opt->distinguish_hydrogen_isotopes && $symbol eq 'H' && $atom->{isotope} ) {
                $symbol = 'D' if $atom->{isotope} == 2;
                $symbol = 'T' if $atom->{isotope} == 3;
            }
            $formula{$symbol} = 0 unless $formula{$symbol};
            $formula{$symbol}++;
            next unless $atom->{hcount};
            $formula{H} = 0 unless $formula{H};
            $formula{H} += $atom->{hcount};
        }
    }

    my @formula;
    if( exists $formula{C} ) {
        push @formula, 'C' . ($formula{C} == 1 ? '' : $formula{C});
        push @formula, 'H' . ($formula{H} == 1 ? '' : $formula{H}) if exists $formula{H};
        delete $formula{C};
        delete $formula{H};
    }
    for my $element (sort keys %formula) {
        push @formula, $element . ($formula{$element} == 1 ? '' : $formula{$element});
    }

    $additional_position = defined $additional_position
                                 ? "\t" . $additional_position
                                 : '';
    print "@formula$additional_position\n";
}

exit( $errors > 0 );
