#! /usr/bin/perl -w
use lib '/usr/lib/perl'; use INN::Config;

##############################################################################
# perl-nocem - a NoCeM-on-spool implementation for INN 2.x.
# Copyright 2000 by Miquel van Smoorenburg <miquels@cistron.nl>
# Copyright 2001 by Marco d'Itri <md@linux.it>
# This program is licensed under the terms of the GNU General Public License.
#
# List of changes:
#
# 2002: Patch by Steven M. Christey for untrusted printf input.
# 2007: Patch by Christoph Biedl for checking a timeout.
# Documentation improved by Jeffrey M. Vinocur (2002), Russ Allbery (2006)
# and Julien Elie (2007).
#
##############################################################################

require 5.00403;
use strict;

# XXX FIXME I haven't been able to load it only when installed.
# If nobody can't fix it just ship the program with this line commented.
#use Time::HiRes qw(time);

my $keyring = $INN::Config::pathetc . '/pgp/ncmring.gpg';

# XXX To be moved to a config file.
#sub local_want_cancel_id {
#    my ($group, $hdrs) = @_;
#
## Hippo has too many false positives to be useful outside of pr0n groups
#    if ($hdrs->{issuer} =~ /(?:Ultra|Spam)Hippo/) {
#        foreach (split(/,/, $group)) {
#            return 1 if /^alt\.(?:binar|sex)/;
#        }
#        return 0;
#    }
#    return 1;
#}

# no user serviceable parts below this line ###################################

# global variables
my ($working, $got_sighup, $got_sigterm, @ncmperm, $cancel);
my $use_syslog = 0;
my $log_open = 0;
my $nntp_open = 0;
my $last_cancel = 0;
my $socket_timeout = $INN::Config::peertimeout - 100;

my $logfile = $INN::Config::pathlog . '/perl-nocem.log';

# initialization and main loop ###############################################

eval { require Sys::Syslog; import Sys::Syslog; $use_syslog = 1; };

if ($use_syslog) {
    if ($Sys::Syslog::VERSION < 0.15) {
        eval "sub Sys::Syslog::_PATH_LOG { '/dev/log' }" if $^O eq 'dec_osf';
        Sys::Syslog::setlogsock('unix') if $^O =~ /linux|dec_osf|freebsd|darwin/;
    }
    openlog('nocem', '', $INN::Config::syslog_facility);
}

if (not $INN::Config::gpgv) {
    logmsg('cannot find the gpgv binary', 'err');
    sleep 5;
    exit 1;
}

if ($INN::Config::version and not $INN::Config::version =~ /^INN 2\.[0123]\./) {
    $cancel = \&cancel_nntp;
} else {
    $cancel = \&cancel_ctlinnd;
}

$SIG{HUP}  = \&hup_handler;
$SIG{INT}  = \&term_handler;
$SIG{TERM} = \&term_handler;
$SIG{PIPE} = \&term_handler;

logmsg('starting up');

unless (read_ctlfile()) {
    sleep 5;
    exit 1;
}

while (<STDIN>) {
    chop;
    $working = 1;
    do_nocem($_);
    $working = 0;
    term_handler() if $got_sigterm;
    hup_handler() if $got_sighup;
}

logmsg('exiting because of EOF', 'debug');
exit 0;

##############################################################################

# Process one NoCeM notice.
sub do_nocem {
    my $token = shift;
    my $start = time;

    # open the article and verify the notice
    my $artfh = open_article($token);
    return if not defined $artfh;
    my ($msgid, $nid, $issuer, $nocems) = read_nocem($artfh);
    close $artfh;
    return unless $nocems;

    &$cancel($nocems);
    logmsg("Articles cancelled: " . join(' ', @$nocems), 'debug');
    my $diff = (time - $start) || 0.01;
    my $nr = scalar @$nocems;
    logmsg(sprintf("processed notice %s by %s (%d ids, %.5f s, %.1f/s)",
        $nid, $issuer, $nr, $diff, $nr / $diff));
}

# - Check if it is a PGP signed NoCeM notice
# - See if we want it
# - Then check PGP signature
sub read_nocem {
    my $artfh = shift;

    # Examine the first 200 lines to see if it is a PGP signed NoCeM.
    my $ispgp = 0;
    my $isncm = 0;
    my $inhdr = 1;
    my $i = 0;
    my $body = '';
    my $from = '';
    my $msgid = '<>';
    while (<$artfh>) {
        last if $i++ > 200;
        s/\r\n$/\n/;
        if ($inhdr) {
            if (/^$/) {
                $inhdr = 0;
            } elsif (/^From:\s+(.*)\s*$/i) {
                $from =  $1;
            } elsif (/^Message-ID:\s+(<.*>)/i) {
                $msgid = $1;
            }
        } else {
            $body .= $_;
            $ispgp = 1 if /^-----BEGIN PGP SIGNED MESSAGE-----/;
            if (/^\@\@BEGIN NCM HEADERS/) {
                $isncm = 1;
                last;
            }
        }
    }

    # must be a PGP signed NoCeM.
    if (not $ispgp) {
        logmsg("Article $msgid: not PGP signed", 'debug');
        return;
    }
    if (not $isncm) {
        logmsg("Article $msgid: not a NoCeM", 'debug');
        return;
    }

    # read the headers of this NoCeM, and check if it's supported.
    my %hdrs;
    while (<$artfh>) {
        s/\r\n/\n/;
        $body .= $_;
        last if /^\@\@BEGIN NCM BODY/;
        my ($key, $val) = /^([^:]+)\s*:\s*(.*)$/;
        $hdrs{lc $key} = $val;
    }
    foreach (qw(action issuer notice-id type version)) {
        next if $hdrs{$_};
        logmsg("Article $msgid: missing $_ pseudo header", 'debug');
        return;
    }
    return if not supported_nocem($msgid, \%hdrs);

    # decide if we want it.
    if (not want_nocem(\%hdrs)) {
        logmsg("Article $msgid: unwanted ($hdrs{issuer}/$hdrs{type})", 'debug');
        return;
    }
# XXX want_hier() not implemented
#    if ($hdrs{hierarchies} and not want_hier($hdrs{hierarchies})) {
#        logmsg("Article $msgid: unwanted hierarchy ($hdrs{hierarchies})",
#            'debug');
#        return;
#    }

    # We do want it, so read the entire article.  Also copy it to
    # a temp file so that we can check the PGP signature when done.
    my $tmpfile = "$INN::Config::pathtmp/nocem.$$";
    if (not open(OFD, ">$tmpfile")) {
        logmsg("cannot open temp file $tmpfile: $!", 'err');
        return;
    }
    print OFD $body;
    undef $body;

    # process NoCeM body.
    my $inbody = 1;
    my @nocems;
    my ($lastid, $lastgrp);
    while (<$artfh>) {
        s/\r\n$/\n/;
        print OFD;
        $inbody = 0 if /^\@\@END NCM BODY/;
        next if not $inbody or /^#/;

        my ($id, $grp) = /^(\S*)\s+(\S+)/;
        next if not $grp;
        if ($id) {
            push @nocems, $lastid
                if $lastid and want_cancel_id($lastgrp, \%hdrs);
            $lastid = $id;
            $lastgrp = $grp;
        } else {
            $lastgrp .= ',' . $grp;
        }
    }
    push @nocems, $lastid if $lastid and want_cancel_id($lastgrp, \%hdrs);
    close OFD;

    # at this point we need to verify the PGP signature.
    return if not @nocems;
    my $e = pgp_check($hdrs{issuer}, $msgid, $tmpfile);
    unlink $tmpfile;
    return if not $e;

    return ($msgid, $hdrs{'notice-id'}, $hdrs{issuer}, \@nocems);
}

# XXX not implemented: code to discard notices for groups we don't carry
sub want_cancel_id {
    my ($group, $hdrs) = @_;

    return local_want_cancel_id(@_) if defined &local_want_cancel_id;
    1;
}

# Do we actually want this NoCeM?
sub want_nocem {
    my $hdrs = shift;

    foreach (@ncmperm) {
        my ($issuer, $type) = split(/\001/);
        if ($hdrs->{issuer} =~ /$issuer/i) {
            return 1 if '*' eq $type or lc $hdrs->{type} eq $type;
        }
    }
    return 0;
}

sub supported_nocem {
    my ($msgid, $hdrs) = @_;

    if ($hdrs->{version} !~ /^0\.9[0-9]?$/) {
        logmsg("Article $msgid: version $hdrs->{version} not supported",
            'debug');
        return 0;
    }
    if ($hdrs->{action} ne 'hide') {
        logmsg("Article $msgid: action $hdrs->{action} not supported",
            'debug');
        return 0;
    }
    return 1;
}

# Check the PGP signature on an article.
sub pgp_check {
    my ($issuer, $msgid, $art) = @_;

    # fork and spawn a child
    my $pid = open(PFD, '-|');
    if (not defined $pid) {
        logmsg("pgp_check: cannot fork: $!", 'err');
        return 0;
    }
    if ($pid == 0) {
        open(STDERR, '>&STDOUT');
        exec($INN::Config::gpgv, '--status-fd=1',
            $keyring ? '--keyring=' . $keyring : '', $art);
        exit 126;
    }

    # Read the result and check status code.
    local $_ = join('', <PFD>);
    my $status = 0;
    if (not close PFD) {
        if ($? >> 8) {
            $status = $? >> 8;
        } else {
            logmsg("Article $msgid: $INN::Config::gpgv killed by signal " . ($? & 255));
            return 0;
        }
    }
#    logmsg("Command line was: $INN::Config::gpgv --status-fd=1"
#         . ($keyring ? ' --keyring=' . $keyring : '') . " $art", 'debug');
#    logmsg("Full PGP output: >>>$_<<<", 'debug');

    if (/^\[GNUPG:\]\s+GOODSIG\s+\S+\s+(.*)/m) {
        return 1 if $1 =~ /\Q$issuer\E/;
        logmsg("Article $msgid: signed by $1 instead of $issuer");
    } elsif (/^\[GNUPG:\]\s+NO_PUBKEY\s+(\S+)/m) {
        logmsg("Article $msgid: $issuer (ID $1) not in keyring");
    } elsif (/^\[GNUPG:\]\s+BADSIG\s+\S+\s+(.*)/m) {
        logmsg("Article $msgid: bad signature from $1");
    } elsif (/^\[GNUPG:\]\s+BADARMOR/m or /^\[GNUPG:\]\s+UNEXPECTED/m) {
        logmsg("Article $msgid: malformed signature");
    } elsif (/^\[GNUPG:\]\s+ERRSIG\s+(\S+)/m) {
        # safety net: we get there if we don't know about some token
        logmsg("Article $msgid: unknown error (ID $1)");
    } else {
        # some other error we don't know about happened.
        # 126 is returned by the child if exec fails.
        s/ at \S+ line \d+\.\n$//; s/\n/_/;
        logmsg("Article $msgid: $INN::Config::gpgv exited "
            . (($status == 126) ? "($_)" : "with status $status"), 'err');
    }
    return 0;
}

# Read article.
sub open_article {
    my $token = shift;

    if ($token =~ /^\@.+\@$/) {
        my $pid = open(ART, '-|');
        if ($pid < 0) {
            logmsg('Cannot fork: ' . $!, 'err');
            return undef;
        }
        if ($pid == 0) {
            exec("$INN::Config::newsbin/sm", '-q', $token) or
                logmsg("Cannot exec sm: $!", 'err');
            return undef;
        }
        return *ART;
    } else {
        return *ART if open(ART, $token);
        logmsg("Cannot open article $token: $!", 'err');
    }
    return undef;
}

# Cancel a number of Message-IDs.  We use ctlinnd to do this,
# and we run up to 15 of them at the same time (10 usually).
sub cancel_ctlinnd {
    my @ids = @{$_[0]};

    while (@ids > 0) {
        my $max = @ids <= 15 ? @ids : 10;
        for (my $i = 1; $i <= $max; $i++) {
            my $msgid = shift @ids;
            my $pid;
            sleep 5 until (defined ($pid = fork));
            if ($pid == 0) {
                exec "$INN::Config::pathbin/ctlinnd", '-s', '-t', '180',
                    'cancel', $msgid;
                exit 126;
            }
#            logmsg("cancelled: $msgid [$i/$max]", 'debug');
        }
        #    Now wait for all children.
        while ((my $pid = wait) > 0) {
            next unless $?;
            if ($? >> 8) {
                logmsg("Child $pid died with status " . ($? >> 8), 'err');
            } else {
                logmsg("Child $pid killed by signal " . ($? & 255), 'err');
            }
        }
    }
}

sub cancel_nntp {
    my $ids = shift;
    my $r;

    if ($nntp_open and time - $socket_timeout > $last_cancel) {
        logmsg('Close socket for timeout');
        close (NNTP);
        $nntp_open = 0;
    }
    if (not $nntp_open) {
        use Socket;
        if (not socket(NNTP, PF_UNIX, SOCK_STREAM, 0)) {
            logmsg("socket: $!", 'err');
            goto ERR;
        }
        if (not connect(NNTP, sockaddr_un($INN::Config::pathrun . '/nntpin'))) {
            logmsg("connect: $!", 'err');
            goto ERR;
        }
        if (($r = <NNTP>) !~ /^200 /) {
            $r =~ s/\r\n$//;
            logmsg("bad reply from server: $r", 'err');
            goto ERR;
        }
        select NNTP; $| = 1; select STDOUT;
        print NNTP "MODE CANCEL\r\n";
        if (($r = <NNTP>) !~ /^284 /) {
            $r =~ s/\r\n$//;
            logmsg("MODE CANCEL not supported: $r", 'err');
            goto ERR;
        }
        $nntp_open = 1;
    }
    foreach (@$ids) {
        print NNTP "$_\r\n";
        if (($r = <NNTP>) !~ /^289/) {
            $r =~ s/\r\n$//;
            logmsg("cannot cancel $_: $r", 'err');
            goto ERR;
        }
    }
    $last_cancel = time;
    return;

ERR:
    # discard unusable socket
    close (NNTP);
    logmsg('Switching to ctlinnd...', 'err');
    cancel_ctlinnd($ids);
    $cancel = \&cancel_ctlinnd;
}

sub read_ctlfile {
    my $permfile = $INN::Config::pathetc . '/nocem.ctl';

    unless (open(CTLFILE, $permfile)) {
        logmsg("Cannot open $permfile: $!", 'err');
        return 0;
    }
    while (<CTLFILE>) {
        chop;
        s/^\s+//;
        s/\s+$//;
        next if /^#/ or /^$/;
        my ($issuer, $type) = split(/:/, lc $_);
        if (not (defined($issuer) and defined($type))) {
            logmsg("Cannot parse nocem.ctl line <<$_>>; syntax is <<issuer:type>>.", 'err');
            next;
        }
        $type =~ s/\s//g;
        foreach (split(/,/, $type)) {
            push(@ncmperm, "$issuer\001$_");
        }
    }
    close CTLFILE;
    return 1;
}

sub logmsg {
    my ($msg, $lvl) = @_;

    if (not $use_syslog) {
        if ($log_open == 0) {
            open(LOG, ">>$logfile") or die "Cannot open log: $!";
            $log_open = 1;
            select LOG; $| = 1; select STDOUT;
        }
        $lvl ||= 'notice';
        print LOG "$lvl: $msg\n";
        return;
    }
    syslog($lvl || 'notice', '%s', $msg);
}

sub hup_handler {
    $got_sighup = 1;
    return if $working;
    close LOG;
    $log_open = 0;
}

sub term_handler {
    $got_sigterm = 1;
    return if $working;
    logmsg('exiting because of signal');
    exit 1;
}

__END__

=head1 NAME

perl-nocem - A NoCeM-on-spool implementation for S<INN 2.x>

=head1 SYNOPSIS

perl-nocem

=head1 DESCRIPTION

NoCeM, which is pronounced I<No See 'Em>, is a protocol enabling
authenticated third-parties to issue notices which can be used
to cancel unwanted articles (like spam and articles in moderated
newsgroups which were not approved by their moderators).  It can
also be used by readers as a I<third-party killfile>.  It is
intended to eventually replace the protocol for third-party cancel
messages.

B<perl-nocem> processes third-party, PGP-signed article cancellation
notices.  It is possible not to honour all NoCeM notices but only those
which are sent by people whom you trust (that is to say if you trust
the PGP key they use to sign their NoCeM notices).  Indeed, it is up
to you to decide whether you wish to honour their notices, depending
on the criteria they use.

Processing NoCeM notices is easy to set up:

=over 4

=item 1.

Import the keys of the NoCeM issuers you trust in order to check
the authenticity of their notices.  You can do:

    gpg --no-default-keyring --primary-keyring <pathetc>/pgp/ncmring.gpg --import <key-file>
    chmod 644 <pathetc>/pgp/ncmring.gpg

where <pathetc> is the value of the I<pathetc> parameter set in
F<inn.conf> and <key-file> the file containing the key(s) to import.
The keyring must be located in <pathetc>/pgp/ncmring.gpg; you only
have to create the directory <pathetc>/pgp before using B<gpg> (it will
automatically generate the F<ncmring.gpg> file) and make sure the news
user can read this file, once generated.

For old PGP-generated keys, you may have to use
B<--allow-non-selfsigned-uid> if they are not properly self-signed,
but anyone creating a key really should self-sign the key.  Current PGP
implementations do this automatically.

The keys of NoCeM issuers can be found in the web site of I<The NoCeM Registry>:
L<http://rosalind.home.xs4all.nl/nocemreg/nocemreg.html>.  You can even
download there a unique file which contains all the keys.

=item 2.

Create a F<nocem.ctl> config file in I<pathetc> indicating the NoCeM issuers
and notices you want to follow.  This permission file contains lines like:

    bleachbot@httrack.com:spam,site
    pgpmoose@killfile.org:pgpmoose-forged-moderation

This will remove all articles for which the issuer (first part of the line,
before the colon C<:>) has issued NoCeM notices corresponding to the
criteria specified after the colon.

You will also find information about that on the web site of
I<The NoCeM Registry>.

=item 3.

Add to the F<newsfeeds> file an entry like this one in order to feed
B<perl-nocem> the NoCeM notices posted to alt.nocem.misc and
news.lists.filters:

    nocem!\
        :!*,alt.nocem.misc,news.lists.filters\
        :Tc,Wf,Ap:<pathbin>/perl-nocem

with the correct path to B<perl-nocem>, located in <pathbin>.  Then, reload
the F<newsfeeds> file (C<ctlinnd reload newsfeeds 'NoCeM channel feed'>).

Note that you should at least carry news.lists.filters on your news
server (or other newsgroups where NoCeM notices are sent) if you wish
to process them.

=item 4.

Everything should now work.  However, do not hesitate to manually test
B<perl-nocem> with a NoCeM notice, using:

    grephistory '<Message-ID>' | perl-nocem

Indeed, B<perl-nocem> expects tokens on its standard input, and
B<grephistory> can easily give it the token of a known article,
thanks to its Message-ID.

=back

When you have verified that everything works, you can eventually turn
off regular spam cancels, if you want, not processing any longer
cancels containing C<cyberspam> in the Path: header (see the
I<refusecybercancels> parameter in F<inn.conf>).

=head1 FILES

=over 4

=item I<pathbin>/perl-nocem

The Perl script itself used to process NoCeM notices.

=item I<pathetc>/nocem.ctl

The configuration file which specifies the NoCeM notices to be processed.

=item I<pathetc>/pgp/ncmring.gpg

The keyring which contains the public keys of trusted NoCeM issuers.

=back

=head1 BUGS

The Subject: header is not checked for the @@NCM string and there is no
check for the presence of the References: header.

The Newsgroups: pseudo header is not checked, but this can be done in
local_want_cancel_id().

The Hierarchies: header is ignored.

=head1 HISTORY

Copyright 2000 by Miquel van Smoorenburg <miquels@cistron.nl>.

Copyright 2001 by Marco d'Itri <md@linux.it>.

$Id: perl-nocem.in 9639 2014-05-17 06:24:44Z iulius $

=head1 SEE ALSO

gpgv(1), grephistory(1), inn.conf(5), newsfeeds(5), pgp(1).

=cut
