#! /usr/bin/perl -w

# vim:syntax=perl

=pod

=head1 NAME

nms2dlf - convert Netscape Messaging Server SMTP log files to the email DLF

=head1 SYNOPSIS

B<nms2dlf> STDIN STDOUT

=head1 DESCRIPTION

This program converts Netscape Messaging Server log file generated by
the SMTP service to the email DLF.

To process correctly the log file, you need to turn on logging of the
following modules:

    - SMTP-Accept
    - SMTP-Deliver
    - Error-Handler
    - Mailbox-Deliver (Need to be enabled)

=head1 LIMITATIONS

This DLF converter was developed for the Netscape Messaging Server
version 4.1. Other versions may or may not work. Contact the LogReport
developers if you have problems with that converter.

You can find information about the log format used by Netscape
Messaging Server at the following URL:

    http://developer.netscape.com/docs/manuals/messaging/41/ag/logging.htm

Not all messages are documented. And we found errors in the
documentation. The fields msgID and mailFrom are inversed in both
SMTP-Accept and SMTP-Deliver from what described the documentation.

=head2 CONFIGURING LOGGING OF MAILBOX-DELIVER MODULE

We found that there are no logs from the Mailbox-Delivery module by
default. In order to fix that problem please make sure you do the
following:

=over

=item 1

Go on the Netscape Messaging Server 4 MTA or Message Store

=item 2

Become the Mail Server user (usually this should not be root but
typically a user like mailsrv or whatever was defined at installation
time, check for the ownership of the files in your message server
instance for example)

=item 3

Go to the right location, something like: I<server-root>/msg-instance

For example it could be: F</usr/netscape/server4/msg-mymailserver>

=item 4

Do:

    $ ./configutil -o service.smtp.mailbox-deliver.log -v yes
    OK SET

=item 5

Then become root, go again to your instance directory like:
F</usr/netscape/server4/msg-mymailserver> and do:

    # ./stop-msg smtp
    /usr/netscape/server4: Stopping SMTP daemon 16279 .... done: 16279

    # ./start-msg smtp
    /usr/netscape/server4: Starting SMTP daemon ..... done: 10820


=back

And then you will start seeing lines which will look like the following:

    [19/Jul/2002:08:31:54 +0200] amail2 smtpd[10820]: \
    General Notice: SMTP-Accept:GZHGT501.C01:<200207190632.g6J6W7Y01389@\
    esmtp.orangemail.ch>:[192.168.30.2]:192.168.30.2: \
    <newsletter@gamezone.com>:19514:1:<hmarmy@orangemail.ch>
    [19/Jul/2002:08:31:54 +0200] amail2 smtpd[10820]: General Notice: \
    Mailbox-Deliver:GZHGT501.C01:<200207190632.g6J6W7Y01389@\
    esmtp.orangemail.ch>:19516:1:hmarmy;

If this was not done the line with the pattern Mailbox-Deliver above
will not be present and the program will give wrong results. A warning
will be output in the error log.

=head1 EXAMPLES

To process a log as produced by Netscape Messaging Server

 $ nms2dlf < mail.log

nms2dlf will be rarely used on its own, but is more likely
called by lr_log2report:

 $ lr_log2report nms < /var/log/mail.log

=head1 VERSION

$Id: nms2dlf.in,v 1.10 2006/07/23 13:16:34 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2002 Stichting LogReport Foundation <logreport@LogReport.org>
Copyright (C) 2002 Arnaud Taddei
Copyright (C) 2002 Arnaud Gaillard

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.

=head1 AUTHORS

Francis J. Lacoste <flacoste@logreport.org>,
Arnaud Taddei <Arnaud.Taddei@sun.com> and
Arnaud Gaillard <wireless@orange.ch>

=cut

use strict;
use lib '/usr/share/perl5';
use Lire::DlfSchema;
use Lire::Email qw/sanitize splitemailadress splitrelay/;
use Lire::Syslog;
use Lire::Program qw/ :msg :dlf /;

# The following variables are global to the entire script. In particular:
#   @accept_queue        this array lists the references of the 
#                        messages tables for which we have 
#                        an SMTP ACCEPT line in the log. This allows the
#                        detection of orphan lines especially at the 
#                        beginning of the log
#   
#   %deferred           a table which has deferred messages as values.
#   
#   $error_on_msg
#   
#   $error_envid
#   
#   $start_time
#
#   $dlf_maker
#
#   $dlflines

use vars qw/ @accept_queue %deferred $error_on_msg $error_envid %error_headers 
             @hed_list $hed_index
	     $start_time $dlf_maker $dlflines 
             $debug $mailbox_deliver_check/;

$mailbox_deliver_check = 0;

#-----------------------------------------------------------------------
#  Function print_dlf

#  This program takes a reference to a message table of the form

#     'delay'           => 1
#     'from'            => '<root@amail1.iorange.ch>'
#     'from_domain'     => 'amail1.iorange.ch'
#     'from_relay_host' => '192.168.40.120'
#     'from_relay_ip'   => '192.168.40.120'
#     'from_user'       => 'root'
#     'logrelay'        => 'amail1'
#     'msgid'           => '<20020219230013.AAA23658+3977504@amail1.iorange.ch>'
#     'nrcpt'           => 1
#     'queueid'         => 'GRSYKQ01.900'
#     'rcpt'            => '<root@amail1.iorange.ch>'
#     'size'            => 839
#     'stat'            => 'unknown error'
#     'time'            => 1014159626
#     'to_relay_host'   => 'localhost'

#  For each rcpt element it will:

#  - sanitize the recipient address 
#  - split the recipient address into 2 new attributes of the message
#    table above:
#      * the recipient user part called 'to_user' 
#      * the recipient domain part called 'to_domain'
#  - Once this is done, it builds a DLF record called $rec, using the
#    dlf_maker object based on the message table that we just updated
#    with the to_user and to_domain fields. This means that we create a
#    list of 17 items, typically:

#     0  1022709631
#     1  'amail1'
#     2  'GWW7SU00.L01'
#     3  '<20020529220015.AAA8615+3977504@amail1.iorange.ch>'
#     4  'root'
#     5  'amail1.iorange.ch'
#     6  '192.168.40.120'
#     7  '192.168.40.120'
#     8  838
#     9  0
#     10  'LIRE_NOTAVAIL'
#     11  'root'
#     12  'amail1.iorange.ch'
#     13  'localhost'
#     14  'LIRE_NOTAVAIL'
#     15  'unknown_error'
#     16  'LIRE_NOTAVAIL'
#  - eventually we print the join of the elements of the record into the
#    STDOUT which is likely to be an intermediary DLF file.

sub print_dlf {
    my ( $msg ) = @_;

    foreach my $to ( split ",", $msg->{rcpt} ) {
	my $email;
	sanitize( "emailadress", $to, $email );
	( $msg->{to_user}, $msg->{to_domain} ) = splitemailadress( $email);

	my $rec = $dlf_maker->( $msg );
	my @rec = @$rec;
	# FIXME: When can this happen?
	if ($#rec < 0) {
	    trace( "*** ERROR in PRINT DLF 0 fields" );
	} else {
	    print join( " ", @rec ), "\n";
	}
	$dlflines++;
    }
}

#-----------------------------------------------------------------------
#  Function print_deferred_messages

#  This function looks for the messages which are in the deferred table.
#  For each of these messages, the delay attribute of each message is
#  calculated and the message is printed to the DLF file.

sub print_deferred_messages {
    my ( $time ) = @_;

    foreach my $msg ( values %deferred ) {
	$msg->{delay} = $time - $msg->{time};
	print_dlf( $msg );
    }

    %deferred = ();
}

#-----------------------------------------------------------------------
#  Function Print_Server_Messages 
#  A function to dump server messages

sub Print_Server_Messages {
    my($list) = @_;

    lr_debug( <<EOT );
Server Messages
---------------

EOT

    foreach my $e (@$list) {
	lr_debug( $e );
    }

    lr_debug( "" );
}

#-----------------------------------------------------------------------
#  Function find_msg_in_accept_queue

#  This function returns a message reference if the message-id field of
#  the latter and the from field are EQUAL to the $msgid and $from
#  parameters given.

#  If there is no pattern it returns -1;

sub find_msg_in_accept_queue {
    my ( $msgid, $from ) = @_;

    for (my $i=@accept_queue; $i > 0; $i--) {
	my $msg = $accept_queue[$i-1];
	if ( $msg->{msgid} eq $msgid && $msg->{from} eq $from )
	{
	    return $i - 1;
	}
    }

    return -1;
}

#-----------------------------------------------------------------------

#  Function find_msg_in_accept_queue_by_qid

#  This function returns a message reference if the message-id field of
#  the latter and the from field are EQUAL to the qid and msgid

#  If there is no pattern it returns -1;

sub find_msg_in_accept_queue_by_qid {
    my ( $msgid, $qid ) = @_;

    for (my $i=@accept_queue; $i > 0; $i--) {
	my $msg = $accept_queue[$i-1];
	if ( $msg->{msgid} eq $msgid && $msg->{queueid} eq $qid )
	{
	    return $i - 1;
	}
    }

    return -1;
}

#-----------------------------------------------------------------------

#  Function correct_fields

#  Since splitting the record is 81% faster than parsing it using
#  m//, we use a error detection and correction schema rather than
#  using the solution which is more expensive but give always the
#  good result.

#  Benchmark: timing 50000 iterations of parse_match, parse_split...
#  parse_match:  3 wallclock secs ( 2.81 usr +  0.00 sys =  2.81 CPU) @ 17793.59/s (n=50000)
#  parse_split:  2 wallclock secs ( 1.55 usr +  0.00 sys =  1.55 CPU) @ 32258.06/s (n=50000)
#                 Rate parse_match parse_split
#  parse_match 17794/s          --        -45%
#  parse_split 32258/s         81%          --

sub correct_fields {
    my ( $fields, $module, $expected ) = @_;

    my $str = join ":", @$fields;
    $str =~ s/\s+\(.*\)//g;  # Indeed we have field elements like:
                             # <3D17A93C00282D9E@freesurfmta05.sunrise.ch>
			     # (added by postmaster@freesurf.ch)
    my @fields = $str =~ m/((?:<[^>]+?>,?)+|[^:]+)/g;

    if (@fields != $expected) {
      die "invalid $module record: should contains $expected fields but ",
	"has ",	scalar @fields, " after correction\n";
    }

    return \@fields;
}

#-----------------------------------------------------------------------
# Function smtp_accept_event

sub smtp_accept_event {
    my ( $log, $fields ) = @_;

    # Fields order is
    # envelopeID msgID peerAddress peerHost mailFrom msgSize \
    # numRecipients recipientList
    # In the documentation, msgID and mailFrom are switched.
    $fields = correct_fields( $fields, "SMTP-Accept", 8 )
      if @$fields != 8;

    my $from;
    sanitize( "emailadress", $fields->[4], $from );

    my ( $user, $host ) = splitemailadress( $from );

    my $dlf = {
	       time	    => $log->{timestamp},
	       logrelay	    => $log->{hostname},
	       queueid	    => $fields->[0],
	       msgid	    => $fields->[1],
	       from	    => $fields->[4],
	       from_user    => $user,
	       from_domain  => $host,
	       size	    => $fields->[5],

	       # Non DLF mapped fields
	       nrcpt	     => $fields->[6],
	       rcpt	     => $fields->[7],
	      };
    sanitize( "relayhost", $fields->[3], $dlf->{from_relay_host} );
    sanitize( "relayip", $fields->[2], $dlf->{from_relay_ip} );

    push @accept_queue, $dlf;

    # Check if the accept_queue seems to be growing without bounds.
    # If we have more than 50 messages pending on the @accept_queue,
    # it is really probable that logging in the Mailbox-Deliver module
    # wasn't turned on.
    if ( !$mailbox_deliver_check && @accept_queue > 50 ) {
	$mailbox_deliver_check = 1;
	lr_warn( "More than 50 incoming messages not yet delivered. Did you turned on service.smtp.mailbox-deliver.log?" );
    }
}

#-----------------------------------------------------------------------
# Function smtp_deliver_event

sub smtp_deliver_event {
    my ( $log, $fields ) = @_;

    # Fields order is:
    # envelopeID msgID status destHost mailFrom msgSize \
    # numRecipients recipientList
    # In the documentation, msgID and mailFrom are switched.
    $fields = correct_fields( $fields, "SMTP-Deliver", 8 )
      if @$fields != 8;

    my $msg = $deferred{$fields->[0]};
    unless ( defined $msg ) {
	my $i = find_msg_in_accept_queue( $fields->[1], $fields->[4] );
	if ( $i >= 0 ) {
	    smtp_relayforward_event( $log, $fields, $i );
	    return;
	}

	my $email;
	sanitize( "emailadress", $fields->[4], $email );
	my ( $user, $host ) = splitemailadress( $email );

	# This deliver event doesn't have any corresponding SMTP-Accept
	# It's either a bounce or its SMTP-Accept event was in a previous
	# log file
	$msg = {
		logrelay    => $log->{hostname},
		queueid	    => $fields->[0],
		msgid	    => $fields->[1],
		from_user   => $user,
		from_domain => $host,
		size	    => $fields->[5],
		# Non DLF mapped fields
		nrcpt	    => $fields->[6],
		rcpt	    => $fields->[7],
	       };

	if ( $fields->[4] eq '<>' ) {
	    # Bounce
	    $msg->{time}	    = $log->{timestamp};
	    $msg->{from_relay_host} = "localhost";
	    $msg->{from_relay_ip}   = "127.0.0.1";
	} else {
	    # Log file rotation
	    $msg->{time}	    = $start_time;
	    # from_relay_host and from_relay_ip are UNKNOWN
	}
    }

    sanitize( "relayhost", $fields->[3], $msg->{to_relay_host} );

    if ( $fields->[2] eq 'Delivered' ) {
	$msg->{stat}  = 'sent';
	$msg->{delay} = $log->{timestamp} - $msg->{time};
	$msg->{nrcpt} = $fields->[6];
	$msg->{rcpt}  = $fields->[7];

	print_dlf( $msg );
    } else {
	$msg->{stat} = 'deferred';
	$deferred{$fields->[0]} = $msg;
    }
}

#-----------------------------------------------------------------------
# Function smtp_replayforward_event

sub smtp_relayforward_event {
    my ( $log, $fields, $accept_idx ) = @_;

    my @forward = ();
    my @relay   = ();
    my $or_msg  = $accept_queue[$accept_idx];

    # Try to find each recipient in the original recipient list
    # All recipient that can't be found is a forward (alias)
    my @or_rcpt = split ",", $or_msg->{rcpt};
    foreach my $to ( split ",", $fields->[7] ) {
	if ( grep { $_ eq $to } @or_rcpt ) {
	    push @relay, $to;
	} else {
	    push @forward, $to;
	}
    }

    # Remove from accept queue if all relay recipients covers the original
    # message recipient list
    if ( @relay >= $or_msg->{nrcpt} ) {
	splice @accept_queue, $accept_idx, 1;
    } else {
	# Remove the relay recipients from the msg on the accept_queue
	# so that they aren't marked as delivered locally later on
	my @new_rcpt = ();
	foreach my $to ( split ",", $or_msg->{rcpt} ) {
	    # Destination will be equal in the case of a relay
	    next if grep { $_ eq $to } @relay;
	    push @new_rcpt, $to;
	}
	$or_msg->{rcpt} = join ",", @new_rcpt;
	$or_msg->{nrcpt} = @new_rcpt;
    }

    if ( @relay ) {
	# Create by copying
	my %dlf = %$or_msg;

	sanitize( "relayhost", $fields->[3], $dlf{to_relay_host} );
	$dlf{size} = $fields->[5];

	if ( $fields->[2] eq 'Delivered' ) {
	    $dlf{stat}  = 'sent';
	    $dlf{delay} = $log->{timestamp} - $or_msg->{time};
	    $dlf{nrcpt} = @relay;
	    $dlf{rcpt}  = join ",", @relay;

	    print_dlf( \%dlf );
	} else {
	    $dlf{stat} = 'deferred';
	    $deferred{$fields->[0]} = \%dlf;
	}
    }

    if ( @forward ) {
	my %dlf = (
		   time	    => $log->{timestamp},
		   logrelay => $log->{hostname},
		   queueid  => $fields->[0],
		   msgid    => $fields->[1],
		   from	    => $fields->[4],
		   from_user	=> $or_msg->{from_user},
		   from_domain	=> $or_msg->{from_domain},
		   size	    => $fields->[5],
		   from_relay_host  => "localhost",
		   from_relay_ip    => "127.0.0.1",

		   # Non DLF mapped fields
		   nrcpt     => scalar @forward,
		   rcpt	     => join( ",", @forward),
		  );

	sanitize( "relayhost", $fields->[3], $dlf{to_relay_host} );

	if ( $fields->[2] eq 'Delivered' ) {
	    $dlf{stat}  = 'sent';
	    $dlf{delay} = 0;

	    print_dlf( \%dlf );
	} else {
	    $dlf{stat} = 'deferred';
	    $deferred{$fields->[0]} = \%dlf;
	}
    }
}

#-----------------------------------------------------------------------
# Function smtp_deny_event

sub smtp_deny_event {
    my ( $log, $msg ) = @_;

    my ( $host, $ip ) = $msg =~ /Denied TCP access to (\S+) \[([\d.]+)\]/
      or die "can't extract host and ip from Denied TCP event\n";

    my $dlf = $dlf_maker->( {
			     time		=> $log->{timestamp},
			     logrelay		=> $log->{hostname},
			     from_relay_host	=> $host,
			     from_relay_ip	=> $ip,
			     to_relay_host	=> "localhost",
			     stat		=> "denied tcp access",
			    } );
    print join( " ", @$dlf ), "\n";
    $dlflines++;
}

#-----------------------------------------------------------------------
# Function Handler_Error_Event
#
# This function is called when an Handler Error Event line is parsed in
# the log

sub Handler_Error_Event {
    my ( $log, $fields ) = @_;

    trace( "*** Handler_Error_Event IN: $log" );

    # Fields error
    #	envelopeID mailFrom size msgID
    $fields = correct_fields( $fields, "ErrorHandler", 4 )
      if @$fields != 4;

    # Find the message to which this error is related on the
    # accept_queue
    my $i = find_msg_in_accept_queue( $fields->[3], $fields->[1] );
    die "error_event: can't find message $fields->[3] in \@accept_queue\n"
      if $i < 0;

    $error_envid    = $fields->[0];
    $error_on_msg   = $i;
    Update_HED('error_on_msg',$i);
    Update_HED_On_Index($hed_index,'message-id', $error_envid);

    trace( "*** Handler_Error_Event OUT!" );
    Debug();
}

#-----------------------------------------------------------------------
# Function Is_Orphan_Handler_Error

# This function is testing if the General Error: ... dump line is
# orphan or not.

sub Is_Orphan_Handler_Error {
    if (defined $error_on_msg) {
	return 0;
    } else {
	return 1;
    }
}

#-----------------------------------------------------------------------
# Function HED_Canonicalisation

#  The NMS4 logs dump errors in the form

#     General Error:   Attribute: Value
#  or General Error:   Attribute! Value

#  For the second form we need to canonicalise it back to the first form.
#  This is the goal of this function.

#  In addition we sanitise and trim the values.

sub HED_Canonicalisation {
    my($hed_attribute, $hed_value) = @_;

    if ($hed_attribute =~ /^\s*([-\w]+)! (.*)/) {
	$hed_attribute = $1;
	$hed_value     = $2 . $hed_value if defined $2;
    }

    $hed_attribute =~ s/^\s*//;
    $hed_value     =~ s/\s*$//;
    if (defined $hed_value ) {
	$hed_value =~ s/^\s*//;
	$hed_value =~ s/\s*$//;
    }
    $hed_attribute = lc $hed_attribute;

    return ($hed_attribute, $hed_value);
}

#-----------------------------------------------------------------------
# Function Is_HED_End
#
# A function to test if the HED is finished

sub Is_HED_End {
    my($hed_attribute) = @_;
    if ( $hed_attribute =~ /^\s*$/ ) {
	return 1;
    } else {
	return 0;
    }
}

#-----------------------------------------------------------------------

sub sortbynmuber {
    my($a,$b);
    return $a <=> $b;
}

#-----------------------------------------------------------------------

#  Function Update_HED

#  A function to update the HED Attribute and Value in the HED_table.

sub Update_HED {
    my($hed_attribute, $hed_value) = @_;

    trace( "*** Update_HED IN: att: $hed_attribute val: $hed_value" );

    my ($i, $new_hed, $value);

    if ($hed_attribute eq 'trace') {
	for ($i = 0; $i < $#hed_list + 1; $i++) {
	    $value = Get_HED($i,'trace');
	    if (defined $value) {
		my(@l) = @{${$hed_list[$i]}{'trace'}};
		if ($#l < 2) {
		    push (@{${$hed_list[$i]}{'trace'}},$hed_value);
		    last;
		} else {
		    next;
		}
	    } else {
		push (@{${$hed_list[$i]}{'trace'}},$hed_value);
		last;
	    }
	}
    } elsif ($hed_attribute eq 'channel-to') {
	for ($i = 0; $i < $#hed_list + 1; $i++) {
	    $value = Get_HED($i,'channel-to');
	    if (defined $value) {
		my(@l) = @{${$hed_list[$i]}{'channel-to'}};
		if ($#l < 2) {
		    push (@{${$hed_list[$i]}{'channel-to'}},$hed_value);
		    last;
		} else {
		    next;
		}
	    } else {
		push (@{${$hed_list[$i]}{'channel-to'}},$hed_value);
		last;
	    }
	}
    } elsif ($hed_attribute eq 'message-id') {
	for ( $i = 0; $i < $#hed_list + 1; $i++) {
	    $value = Get_HED($i, $hed_attribute);
	    if (defined $value && $value eq $hed_value) {
		trace( "*** Update_HED    QID $hed_value already exists ",
		       "for index $hed_index");
		return;
	    }
    }
	$hed_index = Add_HED($hed_attribute, $hed_value);
    } else {
	$new_hed = 1;
	for ( $i = 0; $i < $#hed_list + 1; $i++) {
	    $value = Get_HED($i, $hed_attribute);
	    if (defined $value) {
		next;
	    } else {
		${$hed_list[$i]}{$hed_attribute} = $hed_value;
		$hed_index                       = $i;
		$new_hed                         = 0;
		last;
	    }
	}
	$hed_index = Add_HED($hed_attribute, $hed_value) if ($new_hed);
    }

    trace( "*** Update_HED OUT:" );
    Debug();
}

#-----------------------------------------------------------------------
#  Function Update_HED_On_Index
#
#  Update HED based on an HED index

sub Update_HED_On_Index {
    my ($pos,$hed_attribute,$hed_value) = @_;

    trace( "*** Update_HED_On_Index IN: pos: $pos att: ",
	   $hed_attribute, " val: ", $hed_value );

    ${$hed_list[$pos]}{$hed_attribute} = $hed_value;

    trace( "*** Update_HED_On_Index OUT" );
}


#-----------------------------------------------------------------------
#  Function Add_HED
#
#  A function to add a new HED in the error list

sub Add_HED {
    my($hed_attribute, $hed_value) = @_;

    trace( "*** Add_HED IN: att: $hed_attribute val: $hed_value" );

    my %hed = ($hed_attribute, $hed_value);
    $hed_list[$#hed_list + 1] = \%hed;

    trace( "*** Add_HED OUT" );
    Debug();

    return ($#hed_list);
}

#-----------------------------------------------------------------------
# Function Get_HED
#
# A function to retrieve a value for a given message in the HED table
# and for a given attribute
sub Get_HED {
    my($position, $attribute) = @_;

    my $hed;

    $hed = $hed_list[$position];
    return $$hed{$attribute};
}

#-----------------------------------------------------------------------
# Function shift2

sub shift2 {
    my($array) = @_;

    return splice(@{$array},0,1);
}

#-----------------------------------------------------------------------
#  Function Remove_HED
#
#  A function to remove an HED from the error list

sub Remove_HED {
    my($position) = @_;

    trace( "*** Remove_HED IN: $position" );

    if ($position == 0) {
	@hed_list = pop(@hed_list);
    } else {
	splice @hed_list, 0, $position;
    }

    # &Debug;
}

#-----------------------------------------------------------------------
#  Function HED_Event
#
#  When a line is of the form 
#
#      General Error:    attribute: value
#  or  General Error:    attribute! value
#
#  this function is called and it is going to do several things:
#
#
#  - If the line is an orphan line (i.e. there is no error message that
#     was identified before, for example because the log rotation put the
#     previous information in another file) we return which means we skip
#     the line. Actually we should put it into a continuation mechanism
#     (for the future)
#
#  we are going to store the information in an Handler_HED_table which
#  will store:
#
#     index_number    -->   Table   
#                       Attribute    Value
#                       Attribute    Value
#
#  What we need to be careful with
#
#  - the Handler Error Dump Events can be intermixed for different
#    messages and the difficulty is to make sure we detect correctly to
#    which QID the event line belongs.
#
#  - to finish an Handler Error Dump and then once the Dump is done, that
#    we signal it to the calling program to call the processing of the
#    error
#
#
#  The function receives 2 arguments:
#      - $dump_attribute: This is the attribute name
#      - $dump_value:     This is the value name

sub HED_Event {
    my ( $log, $hed_attribute, $hed_value ) = @_;

    trace( "*** HED_Event IN: $log $hed_attribute $hed_value" );

    if ($hed_value =~ /Logging Error:(.*):SMTP-Router:(.*)/) {
	my($qid)   = $1;
	my($error) = $2;
	return;
    } else {
	# Here we make sure that the line is not an orphan line
	return if Is_Orphan_Handler_Error();

	# Handle Channel-To! form of the attribute and canonicalise attributes and values
	($hed_attribute, $hed_value) = HED_Canonicalisation($hed_attribute,
							    $hed_value);

	# Here we test if we reached the end of an HED and then we
	# process the error because it means we have everything to do
	# it and produce a DLF entry
	if ( Is_HED_End($hed_attribute)) {
	    Process_HED( $log->{timestamp} );
	    Remove_HED(0);
	}

	# Now for each of the QID messages which are in the HED_table
	# We assume that the threads are working in sequence so if we
	# read a line and we read the error tables, the first QID for
	# which we find that the attribute is empty is the good one.
	#
	# Of course some attributes are multivalued.
	Update_HED($hed_attribute,$hed_value);
    }

    trace( "*** HED_Event OUT!" );
    Debug();
}

#-----------------------------------------------------------------------
# Function Process_Unknown_Error
sub Process_Unknown_Error {
    my ($time ) = @_;

    trace( "*** Process_Unknown_Error IN: $error_on_msg" );

    my $msg = $accept_queue[$error_on_msg];

    $msg->{delay}	    = $time - $msg->{time};
    $msg->{stat}	    = "unknown error";
    $msg->{to_relay_host}   = "localhost";

    # Mark all recipients as having an error, altough this isn't
    # necessarly the case
    print_dlf( $msg );

    splice @accept_queue, $error_on_msg, 1;

    $error_on_msg = undef;
    $error_envid  = undef;

    trace( "*** Process_Unknown_Error OUT!" );
    Debug();
}

#-----------------------------------------------------------------------
# Function extract_rcpt
sub extract_rcpt {
    return undef unless defined $_[0];
    return $_[0] =~ /^SMTP\s+(<.+>)$/;
}

#-----------------------------------------------------------------------
# Function Process_HED
#
# The function that extracts information from an HED when this is a
# complete one.
sub Process_HED {
    my ($time ) = @_;

    trace( "*** Process_HED IN: $time $error_on_msg" );

    my ($i,$msg,@tos,@stat,$channel_to,$diagcode,$account_to,$host_from);

    $msg = $accept_queue[$error_on_msg];

    if (defined Get_HED($hed_index, 'error_on_msg')) {
	# Here we used to do a sanity check
	# The original code was:
	# die "inconsistent message ID. Expected $error_envid. Found ",
	#   $error_headers{'message-id'}, "\n"
	#	    unless $error_headers{'message-id'} eq $error_envid;
    } else {
	Process_Unknown_Error($time);
	return;
    }

    $channel_to = Get_HED($hed_index, 'channel-to');
    $diagcode   = Get_HED($hed_index, 'diagnostic-code');
    $account_to = Get_HED($hed_index, 'account-to');
    $host_from  = Get_HED($hed_index, 'host-from');
    if ( ref $channel_to) {
	for ( $i=0; $i < @{$channel_to}; $i++ ) {
	    my $to = extract_rcpt( ${$channel_to}[$i] );
	    $to    ||= $account_to;
	    push @tos, $to;
	}
	push(@stat,$diagcode);
    }

    if (! defined $msg->{from_relay_ip} && defined $host_from) {
      my $relay;
      sanitize( "relay", $host_from, $relay );
      ( $msg->{from_relay_host}, $msg->{from_relay_ip} ) =
	splitrelay( $relay );
    }

    my $rcpt    = $msg->{rcpt};
    my %handled = ();
    $msg->{to_relay_host} = "localhost";
    for ($i=0; $i < @tos; $i++ ) {
	my $to	      = $tos[$i];
	my $stat      = lc $stat[$i];
	$stat         =~ s/^\d*\s*//; # Remove error code
	$msg->{stat}  = $stat;
	$msg->{delay} = $time - $msg->{time};

	$msg->{nrcpt}--;
	$msg->{rcpt}  = $to;
	$handled{$to} = 1;

	print_dlf( $msg );
    }

    if ( $msg->{nrcpt} <= 0 ){
	splice @accept_queue, $error_on_msg, 1
    } else {
	# Remove recipients which had an error
	my @new_rcpt = grep { ! $handled{$_} } split ",", $rcpt;
	$msg->{rcpt} = join ",", @new_rcpt;
	$msg->{nrcpt} = @new_rcpt;
    }

    trace( "*** Process_HED OUT" );
    Debug();
}

#-----------------------------------------------------------------------
# Function Mailbox_Deliver
#
# A function to fix the delivery of messages to a local mailbox. We can
# avoid the 10s heuristic if the right logging level is done in the
# configutil of the NMS4 log.
#
# The entry looks like:
#
# 'General Notice: Mailbox-Deliver:GZAI2W00.G14:<OFDD3CC8CF.687C3DCD-ONC1256BF7.004348C2@dataconsulting.local>:2419:1:C.Tenthorey;'

sub Mailbox_Deliver {
    my($fields,$module,$log) = @_;

    my($msgidx);

    # Mailbox-Deliver logging was enabled
    $mailbox_deliver_check ||= 1;

    # We parse the entry
    $fields = correct_fields($fields,'Mailbox-Deliver',5);

    # We find the message in accept_queue
    $msgidx = find_msg_in_accept_queue_by_qid($$fields[1],$$fields[0]);
    if ($msgidx != -1) {
	# We should write the DLF and remove the accept_queue message 
	$accept_queue[$msgidx]->{to_relay_host} = "localhost";
	$accept_queue[$msgidx]->{stat} = "sent";

	# In principle we could do a better calculation
	$accept_queue[$msgidx]->{delay} =
	  $log->{timestamp} - $accept_queue[$msgidx]->{time};

	print_dlf($accept_queue[$msgidx]);

	# Remove from queue
	splice @accept_queue, 0, $msgidx if $msgidx;
    } else {
	# if the mail is not in accept_queue, we must invent a message to be
	# dumped on the system
	my($msg);
	$msg->{to_relay_host}    = "localhost";
	$msg->{stat}             = "sent";
	$msg->{msgid}            = $$fields[1];
	$msg->{nrcpt}            = $$fields[3];
	$msg->{queueid}          = $$fields[0];
	$msg->{rcpt}             = $$fields[4];
	$msg->{size}             = $$fields[2];
	$msg->{logrelay}         = $log->{hostname};
	$msg->{delay}            = $log->{timestamp} - $start_time;
	$msg->{time}             = $log->{timestamp};
	print_dlf($msg);
    }
}

#-----------------------------------------------------------------------
# Function trace
#
# Output a debug-level message only when $debug is set
sub trace {
    lr_debug( @_ ) if $debug;
}

#-----------------------------------------------------------------------
# Function Debug
#
# A function to help debugging the nms2dlf code.
#
sub Debug {

  if ($debug) {
      my ($QID,$HEDE) = ('','');

      my ($d_hed_index,$d_error_on_msg,$d_error_envid) = ('undef','undef','undef');
      $d_hed_index    = $hed_index     if (defined $hed_index);
      $d_error_on_msg = $error_on_msg  if (defined $error_on_msg);
      $d_error_envid  = $error_envid   if (defined $error_envid);

      my ($d_nb_accept,$d_nb_hed) = ($#accept_queue + 1, $#hed_list + 1);

      for (my $i = 0; $i <= $#accept_queue; $i++) {
	  $QID = $QID . " " . ${$accept_queue[$i]}{'queueid'}
	    if (defined ${$accept_queue[$i]}{'queueid'});
      }

      for (my $i = 0; $i <= $#hed_list; $i++) {
	  $HEDE = $HEDE . " " . ${$hed_list[$i]}{'error_on_msg'}
	    if (defined ${$hed_list[$i]}{'error_on_msg'});
      }

      lr_debug( <<EOT );
    runtime parameters
    ------------------
    hed_index:        $d_hed_index
    error_on_msg:     $d_error_on_msg
    error_envid:      $d_error_envid
    #accept_queue:    $d_nb_accept
    acc. que QID:     $QID
    #hed_list:        $d_nb_hed
    HED error on msg: $HEDE
EOT
  }
}


#=============================================================================
# Here we start the main of the program
#=============================================================================

my $schema = eval { Lire::DlfSchema::load_schema( "email" ) };
lr_err( "failed to load email schema: $@" ) if $@;
$dlf_maker =
  $schema->make_hashref2asciidlf_func( qw/time logrelay queueid msgid
		      from_user from_domain from_relay_host from_relay_ip
		      size delay
		      to_user to_domain to_relay_host
		      stat /);

my $lines	= 0;
$dlflines	= 0;
my $errorlines  = 0;
$error_on_msg   = undef;
$error_envid	= undef;
%error_headers  = ();
$start_time	= 0;
my $end_time	= 0;
my @server_msg  = ();
$debug          = 0;

my $parser = new Lire::Syslog;
init_dlf_converter( "email" );
my $failed_line = undef;
while ( <> ) {
    my $l = $_;
    chomp;
    $lines++;

    # Look for ^M in the log file which fooled the logging system
    if ( /\r$/ ) {
	$failed_line .= $_;
	next;
    } elsif ( defined $failed_line) {
	$_ = $failed_line . $_;
	$failed_line = undef;
    }


    # Let's eliminate the (-8174) case in Netscape logs.
    # This pattern:
    #   (-8174)
    # Happens when there is the line:
    #  ... SSL initialization error: couldn't open certdb /mailserv1fs/netscape/server4/alias/msg-amail1-cert7.db
    # An old Netscape log stupidity! A forgotten \n or a bogus missing 'chomp'
    if (/^\s+\(\-\d+\)$/) {
	push(@server_msg,$l);
	next;
    }

    eval {
	# In the following we are building a reference called $log to a table
	# of the form:
	#    'content'    => 'Error-Handler:...'
	#    'facility'   => "Account"
	#    'hostname'   => 'amail1'
	#    'identifier' => undef
	#    'level'      => "Information"
	#    'pid'        => 23494
	#    'process'    => 'smtpd'
	#    'timestamp'  => 1014159627

	my $log = $parser->parse( $_ );
	die "not a smtpd log line\n" unless $log->{process} eq 'smtpd';
	die "facility and level must be defined\n"
	  unless exists $log->{facility} && exists $log->{level};

	$start_time = $log->{timestamp} unless $start_time;
	$end_time   = $log->{timestamp} if $log->{timestamp} > $end_time;

	# At this stage we are building 3 variables:
	#   Variable         Typical value
	#  ---------------------------------
	#   $module          Error-Handler
	#   @fields          0  'GWW7SV01.L00'
	#                    1  '<root@amail1.iorange.ch>'
	#                    2  841
	#                    3  '<20020529220013.AAA8579+3977504@ama...>'

	my ( $module, @fields ) = split /:/, $log->{content};

	# Happens at the end of the error dump
	$module = "" unless defined $module;

      SWITCH:
	for ( $module ) {
	    /SMTP-Accept/ && do {
		smtp_accept_event( $log, \@fields );
		last SWITCH;
	    };
	    /SMTP-Deliver/ && do {
		smtp_deliver_event( $log, \@fields );
		last SWITCH;
	    };
	    /Denied TCP/ && do {
		smtp_deny_event( $log, join( ":", $module, @fields ) );
		last SWITCH;
	    };
	    /SMTP-ProtocolPlugin/ && do {
		# Skip those messages
		last SWITCH;
	    };

	    /Mailbox-Deliver/ && do {
	      # We are going to flush out the delivered messages
		Mailbox_Deliver(\@fields,$module,$log);
		last SWITCH;
	    };
	    $log->{facility} eq "General" &&
	      $log->{level} eq 'Error' && do {
		if ($log->{content} =~ /SSL/) {
		    push(@server_msg,$l);
		    last SWITCH;
		}
		lr_debug( "*** $l" ) if $debug;
		HED_Event( $log, $module, join( ":", @fields ) );
		last SWITCH;
	    };
	    /Error-Handler/ && do {
		lr_debug( "*** $l" ) if ($debug);

		# We skip if this is the header before the HED is
		# dumped. The useful data are in the HED
		if ($log->{content} =~ /General Notice: Error-Handler/) {
		    last SWITCH;
		}

		# If this is an HED then we process it.
		Handler_Error_Event( $log, \@fields );
		last SWITCH;
	    };
	    $log->{facility} eq "General" &&
	      $log->{level} eq "Information" && do {
		# Skip informational message
		push(@server_msg,$l);
		last SWITCH;
	    };
	    /Client End-Of-Stream|starting queue|ended queue|Processing queue|listening|starting up|got shutdown|shutting down/ && do {
                # Skip
	        push(@server_msg,$l);
                last SWITCH;
            };
	    # Unknown message
	    die "unknown module: $module\n";
	};
    };
    if ($@) {
	lr_warn( $@ );
	lr_warn( "failed to parse line $. '$_'. Skipping." );
	$errorlines++;
    }
}

# At the end of the file we should treat the last errors
# eval { Process_HED( $end_time ) if $error_envid };
# if ($@) { lr_warn( $@ ) };

Print_Server_Messages(\@server_msg);
print_deferred_messages( $end_time );

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__


# Local Variables:
# mode: cperl
# End:
