#!/usr/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_backupDuplicate: Make a copy of the most recent backup
#
# DESCRIPTION
#
#   BackupPC_backupDuplicate: make a copy of the last V4 backup
#
#   Usage:
#       BackupPC_backupDuplicate -h host
#
# AUTHOR
#   Craig Barratt  <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
#   Copyright (C) 2001-2018  Craig Barratt
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#========================================================================
#
# Version 4.2.1, released 7 May 2018.
#
# See http://backuppc.sourceforge.net.
#
#========================================================================

use strict;
no  utf8;
use lib "/usr/share/backuppc/lib";
use Getopt::Std;
use File::Copy;
use File::Path;
use Data::Dumper;
use Digest::MD5;

use BackupPC::Lib;
use BackupPC::XS qw( :all );
use BackupPC::DirOps;
use BackupPC::View;

my $ErrorCnt       = 0;
my $FileErrorCnt   = 0;
my $FileCnt        = 0;
my $FileCntNext    = 100;

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
my $TopDir = $bpc->TopDir();
my $BinDir = $bpc->BinDir();
my %Conf   = $bpc->Conf();
my $Hosts  = $bpc->HostInfoRead();

my %opts;

if ( !getopts("ph:", \%opts) || @ARGV >= 1 || !defined($opts{h}) ) {
    print STDERR <<EOF;
usage: BackupPC_backupDuplicate -h host [-p]
  Options:
     -h host         host name
     -p              don't print progress information
EOF
    exit(1);
}

if ( $opts{h} !~ /^([\w\.\s-]+)$/
        || $opts{h} =~ m{(^|/)\.\.(/|$)}
        || !defined($Hosts->{$opts{h}}) ) {
    print(STDERR "BackupPC_backupDuplicate: bad host name '$opts{h}'\n");
    exit(1);
}
my $Host = $opts{h};
if ( defined(my $error = $bpc->ConfigRead($Host)) ) {
    print(STDERR "BackupPC_backupDuplicate: Can't read $Host's config file: $error\n");
    exit(1);
}
%Conf = $bpc->Conf();
BackupPC::XS::Lib::logLevelSet($Conf{XferLogLevel});
my $LogLevel = $Conf{XferLogLevel};
$bpc->ChildInit();

#
# Write new-style attrib files (<= 4.0.0beta3 uses old-style), which are 0-length
# files with the digest encoded in the file name (eg: attrib_md5HexDigest). We
# can still read the old-style files, and we upgrade them as we go.
#
BackupPC::XS::Attrib::backwardCompat(0, 0);

my @Backups = $bpc->BackupInfoRead($Host);

my($lastIdx, $lastNum, $newIdx, $newNum);

for ( my $i = 0 ; $i < @Backups ; $i++ ) {
    if ( !defined($lastNum) || $lastNum < $Backups[$i]{num} ) {
        $lastIdx = $i;
        $lastNum = $Backups[$i]{num};
    }
}
if ( @Backups == 0 || !defined($lastIdx) ) {
    print(STDERR "BackupPC_backupDuplicate: no backups on host $Host\n");
    exit(1);
}
my $Compress = $Backups[$lastIdx]{compress};
my $SrcDir   = "$TopDir/pc/$Host/$lastNum";
my $Inode    = 1;
my $DestDir;
$newIdx = @Backups;
$newNum = $lastNum;
do {
    $newNum++;
    $DestDir = "$TopDir/pc/$Host/$newNum";
} while ( -d $DestDir );

for ( my $i = 0 ; $i < @Backups ; $i++ ) {
    $Inode = $Backups[$i]{inodeLast} + 1 if ( $Inode <= $Backups[$i]{inodeLast} );
}

%{$Backups[$newIdx]}        = %{$Backups[$lastIdx]};
$Backups[$newIdx]{num}      = $newNum;
$Backups[$newIdx]{noFill}   = 0;
$Backups[$newIdx]{compress} = $Compress;
$Backups[$newIdx]{version}  = $bpc->Version();

print("__bpc_pidStart__ $$\n") if ( !$opts{p} );

createFsckFile($DestDir);
$bpc->flushXSLibMesgs();

if ( $Backups[$lastIdx]{version} >= 4 ) {
    #
    # Duplicate a V4 backup by copying #lastNum to #newNum.
    # We write directly to $DestDir.
    #
    if ( !$opts{p} ) {
        print("__bpc_progress_state__ copy #$lastNum -> #$newNum\n");
    }
    if ( !chdir($SrcDir) ) {
        print(STDERR "BackupPC_backupDuplicate: cannot chdir to $SrcDir\n");
        print("__bpc_pidEnd__ $$\n") if ( !$opts{p} );
        exit(1);
    }
    #
    # initially set the prior backup to unfilled, which we will update
    # once we finish successfully.
    #
    $Backups[$lastIdx]{noFill} = 1;
    print("Copying backup #$lastNum to #$newNum\n") if ( $LogLevel >= 1 );
    copy_v4_file(".", ".");
    BackupPC::DirOps::find($bpc, {wanted => \&copy_v4_file}, ".", 1);
    $Backups[$lastIdx]{noFill} = 0 if ( $ErrorCnt == 0 );
    printProgress(1);
} else {
    #
    # Duplicate a V3 backup, and turn it into a filled V4 backup.
    # We write directly to $DestDir.
    #
    if ( !$opts{p} ) {
        print("__bpc_progress_state__ copy #$lastNum -> #$newNum\n");
    }
    my $view = BackupPC::View->new($bpc, $Host, \@Backups);
    $Backups[$newIdx]{nFiles}        = 0;
    $Backups[$newIdx]{size}          = 0;
    $Backups[$newIdx]{nFilesExist}   = 0;
    $Backups[$newIdx]{sizeExist}     = 0;
    $Backups[$newIdx]{sizeExistComp} = 0;
    $Backups[$newIdx]{nFilesNew}     = 0;
    $Backups[$newIdx]{sizeNew}       = 0;
    $Backups[$newIdx]{sizeNewComp}   = 0;

    my $deltaInfo = BackupPC::XS::DeltaRefCnt::new($DestDir);

    print("Copying v3 backup #$lastNum to v4 #$newNum\n") if ( $LogLevel >= 1 );
    foreach my $shareName ( $view->shareList($lastNum) ) {
        my $ac = BackupPC::XS::AttribCache::new($Host, $newNum, $shareName, $Compress);
        $ac->setDeltaInfo($deltaInfo);
        $bpc->flushXSLibMesgs();
        if ( $view->find($lastNum, $shareName, "/", 0, \&copy_v3_file, $ac, $deltaInfo) ) {
            print(STDERR "BackupPC_backupDuplicate: bad share '$shareName'\n");
            $ErrorCnt++;
            next;
        }
        $ac->flush(1);
    }
    $Backups[$newIdx]{inodeLast} = $Inode + 1;
    $deltaInfo->flush();
    printProgress();
}

#
# merge in backups (in case it changed)
#
my @newBackups = $bpc->BackupInfoRead($Host);
%{$newBackups[scalar(@newBackups)]} = %{$Backups[$newIdx]};
@Backups = @newBackups;
$bpc->BackupInfoWrite($Host, @Backups);
BackupPC::Storage->backupInfoWrite("$TopDir/pc/$Host",
                                    $Backups[$newIdx]{num}, $Backups[$newIdx], 1);
$ErrorCnt += BackupPC::XS::Lib::logErrorCntGet();
$bpc->flushXSLibMesgs();
unlink("$DestDir/refCnt/needFsck.dup") if ( $ErrorCnt == 0 );
my $optsp = " -p" if ( $opts{p} );
$ErrorCnt++ if ( system("$BinDir/BackupPC_refCountUpdate -h $Host -o 0$optsp") != 0 );
print("BackupPC_backupDuplicate: got $ErrorCnt errors and $FileErrorCnt file open errors\n");
print("__bpc_pidEnd__ $$\n") if ( !$opts{p} );
exit($ErrorCnt ? 1 : 0);

sub copy_v4_file
{
    my($name, $path) = @_;

    printf("Got path = %s, name = %s, e,d,f = %d,%d,%d\n", $path, $name, -e $path, -d $path, -f $path)
                                        if ( $LogLevel >= 5 );
    if ( -d $path ) {
        print("Creating directory $DestDir/$path\n") if ( $LogLevel >= 3 );
        eval { mkpath("$DestDir/$path", 0, 0777) };
        if ( $@ ) {
            print(STDERR "Can't mkpath $DestDir/$path\n");
            $ErrorCnt++;
        }
        #
        # this is actually the directory count; it's expensive to get the file count since
        # we'd have to open every attrib file
        #
        $FileCnt++;
        return;
    }
    if ( !copy($path, "$DestDir/$path") ) {
        print(STDERR "Can't copy $path to $DestDir/$path\n");
        $ErrorCnt++;
    }
    printProgress(1) if ( $FileCnt >= $FileCntNext );
}

#
# Read a modest-sized file
#
sub readFile
{
    my($path) = @_;
    my $f = BackupPC::XS::FileZIO::open($path, 0, $Compress);
    my $data;
    if ( !defined($f) || $f->read(\$data, 65536) < 0 ) {
        print(STDERR "Unable to open/read file $path\n");
        $f->close() if ( defined($f) );
        $FileErrorCnt++;
        return;
    }
    $f->close();
    return $data;
}

sub copy_v3_file
{
    my($a, $ac, $deltaInfo) = @_;
    my $fileName = $a->{relPath};
    my $copyFile = 0;

    print("copy_v3_file: $a->{relPath}\n") if ( $LogLevel >= 6 );

    if ( $a->{type} == BPC_FTYPE_DIR ) {
        eval { mkpath($a->{fullPath}, 0, 0777) };
        if ( $@ ) {
            print("Can't create $a->{fullPath}\n");
            $ErrorCnt++;
            return -1;
        }
    } else {
        printProgress() if ( ++$FileCnt >= $FileCntNext );
    }
    $copyFile = 1 if ( $a->{type} == BPC_FTYPE_FILE
                    || $a->{type} == BPC_FTYPE_SYMLINK
                    || $a->{type} == BPC_FTYPE_CHARDEV
                    || $a->{type} == BPC_FTYPE_BLOCKDEV );
    #
    # TODO: confirm charset for v3 symlink contents: do we need to convert to utf8?
    #
    # assign inodes; handle hardlinks and nlinks
    #
    if ( $a->{type} == BPC_FTYPE_HARDLINK ) {
        #
        # old-style hardlink: the file gives the path of the linked-to file,
        # which we might or might not have processed yet.
        #
        my $target = readFile($a->{fullPath});
        $target =~ s{^\.?/+}{/};
        $target = "/$target" if ( $target !~ m{^/} );

        print("Got hardlink from $a->{relPath} to $target\n") if ( $LogLevel >= 5 );
        if ( defined(my $aCurr = $ac->get($target)) ) {
            #
            # Either the target file has been processed, or another hardlink
            # has created this target entry.  Copy the attributes and increment
            # the link count.  Note that normal files have a link count of 0,
            # os if the link count is 0, we set it to 2.
            #
            my $origRelPath = $a->{relPath};
            $a = $aCurr;
            if ( $a->{nlinks} == 0 ) {
                $a->{nlinks} = 2;
            } else {
                $a->{nlinks}++;
            }
            $ac->set($origRelPath, $a);
            $ac->set($target, $a);
            print("target $target exists, set both inode = $a->{inode}, nlinks = $a->{nlinks}\n")
                                         if ( $LogLevel >= 5 );
            return;
        } else {
            #
            # Target file not processed.  Just set attributes with one link,
            # so that an inode is created for us and target.
            #
            $a->{type} = BPC_FTYPE_FILE;
            $a->{nlinks} = 1;
            $a->{inode} = $Inode++;
            $ac->set($a->{relPath}, $a);
            $ac->set($target, $a);
            print("target $target doesn't exist, set both inode = $a->{inode}, nlinks = $a->{nlinks}\n")
                                         if ( $LogLevel >= 5 );
            return;
        }
    } else {
        my $aCurr = $ac->get($a->{relPath});
        if ( defined($aCurr) ) {
            #
            # we are processing the target of other hardlinks; keep the inode
            # and increment the link count.
            #
            $a->{inode}  = $aCurr->{inode};
            $a->{nlinks} = $aCurr->{nlinks} + 1;
        } else {
            #
            # new non-hardlink has a new inode, and no links
            #
            $a->{inode} = $Inode++;
            $a->{nlinks} = 0;
        }
    }

    if ( $copyFile ) {
        my $f = BackupPC::XS::FileZIO::open($a->{fullPath}, 0, $Compress);
        my($size, $data, $data1MB, $fileSize, $found, @s, $thisRead);
        if ( !defined($f) ) {
            print(STDERR "Unable to open file $a->{fullPath}\n");
            $FileErrorCnt++;
            return;
        }
        my $md5 = Digest::MD5->new();
        do {
            $thisRead = $f->read(\$data, 1 << 20);
            if ( $thisRead < 0 ) {
                print(STDERR "error reading file $a->{fullPath} (read returns $thisRead)\n");
                $ErrorCnt++;
                $f->close();
                return;
            } elsif ( $thisRead > 0 ) {
                $md5->add($data);
                $fileSize += length($data);
            }
        } while ( $thisRead > 0 );
        my $inode = (stat($a->{fullPath}))[1];
        my $digest4 = $md5->digest();
        my $digest3 = $bpc->Buffer2MD5_v3(Digest::MD5->new(), $fileSize, \$data1MB);
        my $path3 = $bpc->MD52Path_v3($digest3, $Compress);
        my $path4 = $bpc->MD52Path($digest4, $Compress);
        my $i = -1;

        print("$a->{relPath}: path3 = $path3, path4 = $path4\n") if ( $LogLevel >= 5 );

        #
        # see if it's already in the v4 pool
        #
        if ( $fileSize == 0 ) {
            $found = 1;
            print("$a->{relPath}: empty file in pool by default\n") if ( $LogLevel >= 4 );
        } elsif ( (@s = stat($path4)) && $s[1] == $inode ) {
            $found = 1;
            print("$a->{relPath}: already in pool4 $path4\n") if ( $LogLevel >= 4 );
        } elsif ( !-f $path4 ) {
            #
            # look in v3 pool for match
            #
            while ( 1 ) {
                my $testPath = $path3;
                $testPath .= "_$i" if ( $i >= 0 );
                @s = stat($testPath);
                last if ( !@s );
                if ( $s[1] != $inode ) {
                    $i++;
                    next;
                }
                #
                # Found it!
                #
                print("$a->{relPath}: found at $testPath; link/move to $path4\n") if ( $LogLevel >= 4 );
                $found = 1;
                my $dir4 = $path4;
                $dir4 =~ s{(.*)/.*}{$1};
                if ( !-d $dir4 ) {
                    print("Creating directory $dir4\n") if ( $LogLevel >= 5 );
                    eval { mkpath($dir4, 0, 0777) };
                    if ( $@ ) {
                        print(STDERR "Can't mkpath $dir4\n");
                        $ErrorCnt++;
                        $found = 0;
                    }
                }
                if ( !link($testPath, $path4) ) {
                    print(STDERR "Can't link($testPath,$path4)\n");
                    $ErrorCnt++;
                    $found = 0;
                } elsif ( unlink($testPath) != 1 ) {
                    print(STDERR "Can't unlink($testPath)\n");
                    $ErrorCnt++;
                    $found = 0;
                }
                last;
            }
        }
        #
        # check one more time for the V4 pool in case someone else just moved it there
        # (in which case the link error above is actually not an error).
        #
        if ( !$found && (@s = stat($path4)) && $s[1] == $inode ) {
            $found = 1;
            print("$a->{relPath}: rediscovered in pool4 $path4; prior link error is benign\n");
            $ErrorCnt-- if ( $ErrorCnt > 0 );        # reverse increment above
        }

        if ( $found ) {
            $f->close();
            $a->{digest} = $digest4;
            $deltaInfo->update($Compress, $digest4, 1) if ( length($digest4) );
            $Backups[$newIdx]{nFiles}++;
            $Backups[$newIdx]{nFilesExist}++;
            if ( $a->{type} == BPC_FTYPE_FILE || $a->{type} == BPC_FTYPE_SYMLINK ) {
                $Backups[$newIdx]{size}          += $fileSize;
                $Backups[$newIdx]{sizeExist}     += $fileSize;
                $Backups[$newIdx]{sizeExistComp} += $s[7];
            }
        } else {
            #
            # fall back on copying it to the new pool
            #
            $f->rewind();
            my $pw = BackupPC::XS::PoolWrite::new($Compress);
            while ( $f->read(\$data, 1 << 20) > 0 ) {
                $pw->write(\$data);
                $size += length($data);
            }
            $f->close();
            my($match, $digest, $poolSize, $errorCnt) = $pw->close();
            if ( $LogLevel >= 5 ) {
                my $digestStr = unpack("H*", $digest);
                print("poolWrite->close $fileName: returned match $match, digest $digestStr, poolSize $poolSize, errCnt $errorCnt\n");
            }
            $ErrorCnt += $errorCnt;
            $a->{digest} = $digest;
            $deltaInfo->update($Compress, $digest, 1) if ( length($digest) );
            $bpc->flushXSLibMesgs();
            $Backups[$newIdx]{nFiles}++;
            if ( $match ) {
                $Backups[$newIdx]{nFilesExist}++;
            } else {
                $Backups[$newIdx]{nFilesNew}++;
            }
            if ( $a->{type} == BPC_FTYPE_FILE || $a->{type} == BPC_FTYPE_SYMLINK ) {
                $Backups[$newIdx]{size} += $size;
                if ( $match ) {
                    $Backups[$newIdx]{sizeExist}     += $size;
                    $Backups[$newIdx]{sizeExistComp} += $poolSize;
                } else {
                    $Backups[$newIdx]{sizeNew}       += $size;
                    $Backups[$newIdx]{sizeNewComp}   += $poolSize;
                }
            }
        }
    } elsif ( $a->{type} != BPC_FTYPE_DIR ) {
        $Backups[$newIdx]{nFiles}++;
        $Backups[$newIdx]{nFilesExist}++;
    }
    print("setting $a->{relPath} attrib (type $a->{type}, inode $a->{inode}, nlinks $a->{nlinks})\n") if ( $LogLevel >= 5 );
    $ac->set($a->{relPath}, $a);
    $bpc->flushXSLibMesgs();
}

#
# Create a needFsck file, so if we are killed and can't recover, we can
# make sure an fsck is run next time.
#
sub createFsckFile
{
    my($destDir) = @_;
    mkpath("$destDir/refCnt", 0, 0777);
    my $needFsckFH;
    if ( !(open($needFsckFH, ">", "$destDir/refCnt/needFsck.dup") && close($needFsckFH)) ) {
        print("BackupPC_backupDuplicate: can't create $destDir/refCnt/needFsck.dup ($?)\n");
    }
}

sub printProgress
{
    my($dirCnt) = @_;

    $FileCntNext = $FileCnt + 100;
    return if ( $opts{p} );
    if ( $dirCnt ) {
        print("__bpc_progress_fileCnt__ $FileCnt dirs\n");
    } else {
        print("__bpc_progress_fileCnt__ $FileCnt\n");
    }
}
