# Copyright (C) 2001,2004 Stichting LogReport Foundation logreport@logreport.org

# This file is part of Lire.

# Lire is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program (see COPYING); if not, check with
# http://www.gnu.org/copyleft/gpl.html.

# Authors:
#   E.L. Willighagen <egonw@logreport.org>
#   Francis J. Lacoste <flacoste@logreport.org>

package Lire::DlfAnalysers::DomainCategoriser;

use strict;

use base qw/Lire::DlfCategoriser/;

use Lire::WWW::Domain;

sub new {
    return bless {}, shift;
}

sub name {
    return "www-domain";
}

sub title {
    return "Domain DlfAnalyser";
}

sub description {
    return '<para>This categoriser extracts the <structfield>country</structfield> and <structfield>client_domain</structfield> fields based on the information contained in the <structfield>client_host</structfield> field.</para>';
}

sub src_schema {
    return "www";
}

sub dst_schema {
    return "www-domain";
}

sub initialise {
    my ( $self, $config ) = @_;

    $self->{'analyser'} =
      new Lire::WWW::Domain( 'country_default' => 'LIRE_UNDEF' );

    return;
}

sub categorise {
    my ( $self, $dlf ) = @_;

    return unless defined $dlf->{'client_host'};

    my $host = $dlf->{'client_host'};
    $self->{'analyser'}->setDomain( $host  );
    if ( $host =~ /^(\d+\.\d+\.\d+)\.\d+$/ ) {
        $dlf->{'client_domain'} = $1;
    } elsif ( $host =~ /([-a-zA-Z0-9]+\.[a-z]{3})$/ ||	# Non country TLD
	      $host =~ /([-a-zA-Z0-9]+\.[-a-zA-Z0-9]+\.[a-z]{2})$/ || # Country TLD
	      $host =~ /^([-a-zA-Z0-9]+)$/ # Hostname
	    )
    {
        $dlf->{'client_domain'} = $1;
        $dlf->{'country'} = $self->{'analyser'}->getCountry();
        $dlf->{'country'} = undef
          if $dlf->{'country'} eq 'LIRE_UNDEF';
    }

    return;
}

# keep perl happy
1;
