#! /usr/bin/perl -w

# vim:syntax=perl

use strict;
use lib '/usr/share/perl5';

use Lire::DlfSchema;
use Lire::Syslog;
use Lire::Program qw/:msg :dlf/;

use vars qw/ $dlf_maker $dlflines $debug /;

sub print_dlf {
    my $dlf=$dlf_maker->($_[0]);
    print join( " ", @$dlf ), "\n";
    $dlflines++;
}

my $schema = eval { Lire::DlfSchema::load_schema( "spamfilter" ) };
lr_err( "failed to load spamfilter schema: $@" ) if $@;

$dlf_maker =
  $schema->make_hashref2asciidlf_func( qw/time localserver host_originating ip_originating msgid user msgsize_in msgsize_out time_elapsed spam_score spam_result/);

my $lines	= 0;
$dlflines	= 0;
my $errorlines  = 0;
my @server_msg  = ();
$debug          = 0;
my $syslog_parser = new Lire::Syslog;

my $parser = new Lire::Syslog;
init_dlf_converter( "spamfilter" );
my $failed_line = undef;
my %data = ();
while ( <> ) {
    chomp;
    $lines++;

    my $rec = eval { $syslog_parser->parse( $_ ) };
    if ($@) {
	lr_warn( "line $. is an invalid syslog message: $@" );
	$errorlines++;
    }

    next unless defined $rec->{process} && $rec->{process} =~/^spamd/;

    my $pid	= $rec->{pid};
    my $line	= $rec->{content};

    if ($line=~/(processing|checking) message \<(.+?)\> for (.+?):\d+(, expecting (\d+) bytes)?/) {
	$data{$pid}{msgid}	= $2;
	$data{$pid}{user}	= $3;
	$data{$pid}{msgsize_in}	= $5 || 0;
	next;
    }

    if ($line=~/(clean message|identified spam) \((-?[\d.]+)\/[\d.]+\) for .+?:\d+ in +([\d.]+) seconds, (\d+) bytes./) {
	# ignore leftovers from previous logfiles
	next if (!defined $data{$pid}{msgid});

	my %dlf=();

	$dlf{spam_result}=0;
	$dlf{spam_result}=1 if ($1 eq "identified spam");

	$dlf{spam_score}	= $2;
	$dlf{time_elapsed}	= $3;
	$dlf{msgsize_out}	= $4;

	$dlf{msgid}		= $data{$pid}{msgid};
	$dlf{user}		= $data{$pid}{user};
	$dlf{msgsize_in}	= $data{$pid}{msgsize_in};
	$dlf{time}		= $rec->{timestamp};
	$dlf{localserver}	= $rec->{hostname};

	print_dlf(\%dlf);
	$data{$pid}=();
	next;
    }
}

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__


=pod

=head1 NAME

spamassassin2dlf - convert SpamAssassins log into Lire spamfilter DLF format

=head1 SYNOPSIS

B<spamassassin2dlf>

=head1 DESCRIPTION

This program converts SpamAssassins ( http://spamassassin.org/ )
spamd syslog files to the Lire spamfilter DLF.

=head1 LIMITATIONS

The originating host isn't used (yet) because there is no way to link the
receiving process and its childs.

=head1 EXAMPLES

To process a log as produced by SpamAssassin:

 $ spamassassin2dlf < spamd-log

spamassassin2dlf will be rarely used on its own, but is more likely
called by lr_log2report:

 $ lr_log2report spamassassin < /var/log/spamd-log

=head1 BUGS

This manpage should feature an example SpamAssassin logfile snippet.

=head1 SEE ALSO

http://spamassassin.org

=head1 VERSION

$Id: spamassassin2dlf.in,v 1.8 2008/03/25 11:00:04 wraay Exp $

=head1 AUTHORS

Edwin Groothuis <edwin@mavetju.org>

=head1 COPYRIGHT

Copyright (C) 2002 Edwin Groothuis <edwin@mavetju.org>

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. The name of the copyright holder may not be used to endorse or promote
products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.

=cut

