#!/usr/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_nightly: Nightly cleanup & statistics script.
#
# DESCRIPTION
#
#   BackupPC_nightly performs several administrative tasks:
#
#      - monthly aging of per-PC log files (only with -m option)
#
#      - updating reference counts for V4+ backups
#
#      - pruning files from pool no longer used (ie: those with only one
#        hard link in V3, or a reference count of zero for V4+)
#
#      - sending email to users and administrators (only with -m option)
#
#   Usage: BackupPC_nightly [-m] [-r] [-p] [-P phase] poolRangeStart poolRangeEnd
#
#   Flags:
#
#     -m   Primary (master) BackupPC_nightly. Sends email.
#          Otherise, BackupPC_nightly just does pool pruning.
#          Since several BackupPC_nightly processes might run
#          concurrently, just the first one is given the -m flag
#          by BackupPC.
#
#     -r   Don't run BackupPC_refCountUpdate because there are long
#          running BackupPC_dump jobs that were running the last time
#          we ran BackupPC_refCountUpdate.
#
#     -p   don't show progress
#
#     -P phase
#          Phase from 0..15 each time we run BackupPC_nightly.  Used by
#          BackupPC_refCountUpdate to compute exact pool size for portions
#          of the pool based on $Conf{PoolSizeNightlyUpdatePeriod}.
#
#   The poolRangeStart and poolRangeEnd arguments are integers from 0 to 255.
#   These specify which parts of the subtasks or pool to process.
#
#   In the V3 pool, there are 256 2nd-level directories in the pool
#   (0/0, 0/1, ..., f/e, f/f).  BackupPC_nightly processes the given
#   subset of this list (0 means 0/0, 255 means f/f).  Therefore,
#   arguments of 0 255 process the entire pool, 0 127 does the first
#   half (ie: 0/0 through 7/f), 127 255 does the other half (eg: 8/0
#   through f/f) and 0 15 does just the first 1/16 of the pool (ie: 0/0
#   through 0/f).
#
#   In V4, reference count updating is done for each host.  The host
#   list is divided up, and processed based on the 8 bit argument range.
#
#   The V4 pool has 128 first-level directories and 128 second-level
#   directories.  The 0..255 argument is divided by 2 to select which
#   top-level directories to process.  Odd numbers are skipped.
#
# AUTHOR
#   Craig Barratt  <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
#   Copyright (C) 2001-2017  Craig Barratt
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#========================================================================
#
# Version 4.0.0, released 3 Mar 2017.
#
# See http://backuppc.sourceforge.net.
#
#========================================================================

use strict;
no  utf8;
use lib "/usr/share/backuppc/lib";
use BackupPC::Lib qw( :BPC_DT_ALL );
use BackupPC::XS;
use BackupPC::DirOps;
use Getopt::Std;

use File::Path;
use Data::Dumper;

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
my $TopDir = $bpc->TopDir();
my $BinDir = $bpc->BinDir();
my %Conf   = $bpc->Conf();
my(%Status, %Info, %Jobs, @BgQueue, @UserQueue, @CmdQueue);

#
# We delete unused pool files (link count 1) in sorted inode
# order by gathering batches.  We delete the first half of
# each batch (ie: $PendingDeleteMax / 2 at a time).
#
my @PendingDelete;
my $PendingDeleteMax = 10240;

$bpc->ChildInit();

my %opts;
if ( !getopts("mprP:", \%opts) || @ARGV != 2 ) {
    print("usage: $0 [-m] [-p] [-r] [-P phase] poolRangeStart poolRangeEnd\n");
    exit(1);
}
if ( $ARGV[0] !~ /^(\d+)$/ || $1 > 255 ) {
    print("$0: bad poolRangeStart '$ARGV[0]'\n");
    exit(1);
}
my $poolRangeStart = $1;
if ( $ARGV[1] !~ /^(\d+)$/ || $1 > 255 ) {
    print("$0: bad poolRangeEnd '$ARGV[1]'\n");
    exit(1);
}
my $poolRangeEnd = $1;

if ( $opts{m} ) {
    my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
    if ( $err ) {
        print("Can't connect to server ($err)\n");
        exit(1);
    }
    my $reply = $bpc->ServerMesg("status hosts");
    $reply = $1 if ( $reply =~ /(.*)/s );
    eval($reply);
}

###########################################################################
# V3 pool: get statistics, and remove files that have only one link.
###########################################################################
my $fileCnt;       # total number of v3 files
my $dirCnt;        # total number of v3 directories
my $blkCnt;        # total block size of v3 files
my $fileCntRm;     # total number of removed v3 files
my $blkCntRm;      # total block size of removed v3 files
my $fileCntRep;    # total number of v3 file names containing "_", ie: files
                   # that have repeated md5 checksums
my $fileRepMax;    # worse case number of v3 files that have repeated checksums
                   # (ie: max(nnn+1) for all names xxxxxxxxxxxxxxxx_nnn)
my $fileLinkMax;   # maximum number of hardlinks on a v3 pool file
my $fileLinkTotal; # total number of hardlinks on entire v3 pool
my $fileCntRename; # number of renamed v3 files (to keep file numbering
                   # contiguous)
my %FixList;       # list of v3 paths that need to be renamed to avoid
                   # new holes
if ( $Conf{PoolV3Enabled} ) {
    ScanAndCleanV3Pool();
}

###########################################################################
# Prior to V3, we need to tell BackupPC that it is now ok to start running
# BackupPC_dump commands. In V3+ they are decoupled, so this isn't actually
# needed anymore.
###########################################################################
printf("BackupPC_nightly lock_off\n");

###########################################################################
# V4 pool: run reference count updating and pool cleaning
# This runs in parallel for each subset of the pool
###########################################################################
if ( $opts{r} ) {
    print("log BackupPC_nightly skipping BackupPC_refCountUpdate\n");
} else {
    $opts{P} ||= 0;
    print("log BackupPC_nightly now running BackupPC_refCountUpdate -m -s -c -P $opts{P} -r $poolRangeStart-$poolRangeEnd\n");
    system("$BinDir/BackupPC_refCountUpdate -m -s -c -P $opts{P} -r $poolRangeStart-$poolRangeEnd");
}

###########################################################################
# Send email and generation of backupInfo files for each backup
###########################################################################
if ( $opts{m} ) {
    print("log BackupPC_nightly now running BackupPC_sendEmail\n");
    print("__bpc_progress_state__ BackupPC_sendEmail\n") if ( !$opts{p} );
    system("$BinDir/BackupPC_sendEmail");
}

exit(0);

sub GetPoolStats_V3
{
    my($file, $fullPath) = @_;
    my($inode, $nlinks, $nblocks) = (lstat($fullPath))[1, 3, 12];

    if ( -d _ ) {
        $dirCnt++;
        return;
    } elsif ( ! -f _ ) {
        return;
    }
    if ( $nlinks == 1 ) {
        $blkCntRm += $nblocks;
        $fileCntRm++;
        #
        # Save the files for later batch deletion.
        #
        # This is so we can remove them in inode order, and additionally
        # reduce any remaining chance of race condition of linking to
        # pool files vs removing pool files.  (Other aspects of the
        # design should eliminate race conditions.)
        #
        push(@PendingDelete, {
                    inode => $inode,
                    path  => $fullPath
                }
        );
        if ( @PendingDelete > $PendingDeleteMax ) {
            processPendingDeletes(0);
        }
        #
        # We must keep repeated files numbered sequential (ie: files
        # that have the same checksum are appended with _0, _1 etc).
        # There are two cases: we remove the base file xxxx, but xxxx_0
        # exists, or we remove any file of the form xxxx_nnn.  We remember
        # the base name and fix it up later (not in the middle of find).
        #
        $fullPath =~ s/_\d+$//;
        $FixList{$fullPath}++;
    } else {
        if ( $file =~ /_(\d+)$/ ) {
            $fileRepMax = $1 + 1 if ( $fileRepMax <= $1 );
            $fileCntRep++;
        }
        $fileCnt += 1;
        $blkCnt  += $nblocks;
        $fileLinkMax = $nlinks if ( $fileLinkMax < $nlinks );
        $fileLinkTotal += $nlinks - 1;
    }
}

sub processPendingDeletes
{
    my($doAll) = @_;
    my @delete;

    if ( !$doAll ) {
        @delete = splice(@PendingDelete, 0, $PendingDeleteMax / 2);
    } else {
        @delete = @PendingDelete;
        @PendingDelete = ();
    }
    for my $f ( sort({ $a->{inode} <=> $b->{inode} } @delete) ) {
        my($nlinks) = (lstat($f->{path}))[3];

        next if ( $nlinks != 1 );
        # print("Deleting $f->{path} ($f->{inode})\n");
        unlink($f->{path});
    }
}

sub ScanAndCleanV3Pool()
{
    my @hexChars = qw(0 1 2 3 4 5 6 7 8 9 a b c d e f);

    for my $pool ( qw(pool cpool) ) {
        print("__bpc_progress_state__ v3 $pool scan\n") if ( !$opts{p} );
        for ( my $i = $poolRangeStart ; $i <= $poolRangeEnd ; $i++ ) {
            my $dir        = "$hexChars[int($i / 16)]/$hexChars[$i % 16]";
            # print("Doing $pool/$dir\n") if ( ($i % 16) == 0 );
            $fileCnt       = 0;
            $dirCnt        = 0;
            $blkCnt        = 0;
            $fileCntRm     = 0;
            $blkCntRm      = 0;
            $fileCntRep    = 0;
            $fileRepMax    = 0;
            $fileLinkMax   = 0;
            $fileLinkTotal = 0;
            $fileCntRename = 0;
            %FixList       = ();
            print("__bpc_progress_fileCnt__ $i/$poolRangeEnd\n") if ( !$opts{p} );
            BackupPC::DirOps::find($bpc, {wanted => \&GetPoolStats_V3}, "$TopDir/$pool/$dir")
                                                if ( -d "$TopDir/$pool/$dir" );
            my $kb   = $blkCnt / 2;
            my $kbRm = $blkCntRm / 2;

            #
            # Main BackupPC_nightly counts the top-level directory
            #
            $dirCnt++ if ( $opts{m} && -d "$TopDir/$pool" && $i == 0 );

            #
            # Also count the next level directories
            #
            $dirCnt++ if ( ($i % 16) == 0
                           && -d "$TopDir/$pool/$hexChars[int($i / 16)]" );

            #
            # We need to process all pending deletes before we do the
            # renames
            #
            if ( @PendingDelete ) {
                sleep(1);
                processPendingDeletes(1);
            }

            #
            # Now make sure that files with repeated checksums are still
            # sequentially numbered
            #
            foreach my $name ( sort(keys(%FixList)) ) {
                my $rmCnt = $FixList{$name} + 1;
                my $new = -1;
                for ( my $old = -1 ; ; $old++ ) {
                    my $oldName = $name;
                    $oldName .= "_$old" if ( $old >= 0 );
                    if ( !-f $oldName ) {
                        #
                        # We know we are done when we have missed at least
                        # the number of files that were removed from this
                        # base name, plus a couple just to be sure
                        #
                        last if ( $rmCnt-- <= 0 );
                        next;
                    }
                    my $newName = $name;
                    $newName .= "_$new" if ( $new >= 0 );
                    $new++;
                    next if ( $oldName eq $newName );
                    rename($oldName, $newName);
                    $fileCntRename++;
                }
            }
            print("BackupPC_stats $i = $pool,$fileCnt,$dirCnt,$kb,$kbRm,"
                                  . "$fileCntRm,$fileCntRep,$fileRepMax,"
                                  . "$fileCntRename,$fileLinkMax,$fileLinkTotal\n");
        }
    }
    sleep(1);
    processPendingDeletes(1);
}
