#!/usr/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_tarCreate: create a tar archive of an existing dump
# for restore on a client.
#
# DESCRIPTION
#  
#   Usage: BackupPC_tarCreate [options] files/directories...
#
#   Flags:
#     Required options:
#
#       -h host         Host from which the tar archive is created.
#       -n dumpNum      Dump number from which the tar archive is created.
#                       A negative number means relative to the end (eg -1
#                       means the most recent dump, -2 2nd most recent etc).
#       -s shareName    Share name from which the tar archive is created.
#
#     Other options:
#       -t              print summary totals
#       -r pathRemove   path prefix that will be replaced with pathAdd
#       -p pathAdd      new path prefix
#       -b BLOCKS       output write buffer size in 512-byte blocks (default 20; same as tar)
#       -w readBufSz    buffer size for reading files (default 1048576 = 1MB)
#       -e charset      charset for encoding file names (default: value of
#                       $Conf{ClientCharset} when backup was done)
#       -l              just print a file listing; don't generate an archive
#       -L              just print a detailed file listing; don't generate an archive
#
#     The -h, -n and -s options specify which dump is used to generate
#     the tar archive.  The -r and -p options can be used to relocate
#     the paths in the tar archive so extracted files can be placed
#     in a location different from their original location.
#
# AUTHOR
#   Craig Barratt  <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
#   Copyright (C) 2001-2017  Craig Barratt
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#========================================================================
#
# Version 4.0.0, released 3 Mar 2017.
#
# See http://backuppc.sourceforge.net.
#
#========================================================================

use strict;
no  utf8;
use lib "/usr/share/backuppc/lib";
use File::Path;
use Getopt::Std;
use Encode qw/from_to/;
use Data::Dumper;

use BackupPC::Lib;
use BackupPC::XS qw( :all );
use BackupPC::View;

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );

my %opts;

if ( !getopts("Llte:h:n:p:r:s:b:w:", \%opts) || @ARGV < 1 ) {
    print STDERR <<EOF;
usage: $0 [options] files/directories...
  Required options:
     -h host         host from which the tar archive is created
     -n dumpNum      dump number from which the tar archive is created
                     A negative number means relative to the end (eg -1
                     means the most recent dump, -2 2nd most recent etc).
     -s shareName    share name from which the tar archive is created

  Other options:
     -t              print summary totals
     -r pathRemove   path prefix that will be replaced with pathAdd
     -p pathAdd      new path prefix
     -b BLOCKS       output write buffer size in 512-byte blocks (default 20; same as tar)
     -w readBufSz    buffer size for reading files (default 1048576 = 1MB)
     -e charset      charset for encoding file names (default: value of
                     \$Conf{ClientCharset} when backup was done)
     -l              just print a file listing; don't generate an archive
     -L              just print a detailed file listing; don't generate an archive
EOF
    exit(1);
}

if ( $opts{h} !~ /^([\w\.\s-]+)$/
        || $opts{h} =~ m{(^|/)\.\.(/|$)} ) {
    print(STDERR "$0: bad host name '$opts{h}'\n");
    exit(1);
}
my $Host = $opts{h};

if ( $opts{n} !~ /^(-?\d+)$/ ) {
    print(STDERR "$0: bad dump number '$opts{n}'\n");
    exit(1);
}
my $Num = $opts{n};

my @Backups = $bpc->BackupInfoRead($Host);
my $FileCnt = 0;
my $ByteCnt = 0;
my $DirCnt = 0;
my $SpecialCnt = 0;
my $ErrorCnt = 0;

my $i;
$Num = $Backups[@Backups + $Num]{num} if ( -@Backups <= $Num && $Num < 0 );
for ( $i = 0 ; $i < @Backups ; $i++ ) {
    last if ( $Backups[$i]{num} == $Num );
}
if ( $i >= @Backups ) {
    print(STDERR "$0: bad backup number $Num for host $Host\n");
    exit(1);
}

my $Charset = $Backups[$i]{charset};
$Charset    = $opts{e} if ( $opts{e} ne "" );
my $PreV4   = ($Backups[$i]{version} eq "" || $Backups[$i]{version} =~ /^[23]\./) ? 1 : 0;

my $PathRemove = $1 if ( $opts{r} =~ /(.+)/ );
my $PathAdd    = $1 if ( $opts{p} =~ /(.+)/ );
if ( $opts{s} =~ m{(^|/)\.\.(/|$)} ) {
    print(STDERR "$0: bad share name '$opts{s}'\n");
    exit(1);
}

our $ShareName = $opts{s};
our $view = BackupPC::View->new($bpc, $Host, \@Backups);

#
# This constant and the line of code below that uses it are borrowed
# from Archive::Tar.  Thanks to Calle Dybedahl and Stephen Zander.
# See www.cpan.org.
#
# Archive::Tar is Copyright 1997 Calle Dybedahl. All rights reserved.
#                 Copyright 1998 Stephen Zander. All rights reserved.
#
my $tar_pack_header
    = 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12';
my $tar_header_length = 512;

my $BufSize    = $opts{w} || 1048576;     # 1MB or 2^20
my $WriteBuf   = "";
my $WriteBufSz = ($opts{b} || 20) * $tar_header_length;

my(%UidCache, %GidCache);
my(%HardLinkExtraFiles, @HardLinks, %Inode2File);

#
# Write out all the requested files/directories
#
binmode(STDOUT);
my $fh = *STDOUT;
if ( $ShareName eq "*" ) {
    my $PathRemoveOrig = $PathRemove;
    my $PathAddOrig    = $PathAdd;
    foreach $ShareName ( $view->shareList($Num) ) {
        #print(STDERR "Doing share ($ShareName)\n");
        $PathRemove = "/" if ( !defined($PathRemoveOrig) );
        ($PathAdd = "/$ShareName/$PathAddOrig") =~ s{//+}{/}g;
        foreach my $dir ( @ARGV ) {
            archiveWrite($fh, $dir);
        }
        archiveWriteHardLinks($fh);
    }
} else {
    foreach my $dir ( @ARGV ) {
        archiveWrite($fh, $dir);
    }
    archiveWriteHardLinks($fh);
}

if ( !$opts{l} && !$opts{L} ) {
    #
    # Finish with two null 512 byte headers, and then round out a full
    # block.
    # 
    my $data = "\0" x ($tar_header_length * 2);
    TarWrite($fh, \$data);
    TarWrite($fh, undef);
}

#
# print out totals if requested
#
if ( $opts{t} ) {
    print STDERR "Done: $FileCnt files, $ByteCnt bytes, $DirCnt dirs,",
		 " $SpecialCnt specials, $ErrorCnt errors\n";
}
if ( $ErrorCnt && !$FileCnt && !$DirCnt ) {
    #
    # Got errors, with no files or directories; exit with non-zero
    # status
    #
    exit(1);
}
exit(0);

###########################################################################
# Subroutines
###########################################################################

sub archiveWrite
{
    my($fh, $dir, $tarPathOverride) = @_;

    if ( $dir =~ m{(^|/)\.\.(/|$)} ) {
        print(STDERR "$0: bad directory '$dir'\n");
	$ErrorCnt++;
        return;
    }
    $dir = "/" if ( $dir eq "." );
    #print(STDERR "calling find with $Num, $ShareName, $dir\n");
    if ( $view->find($Num, $ShareName, $dir, 0, \&TarWriteFile,
                $fh, $tarPathOverride) < 0 ) {
        print(STDERR "$0: bad share or directory '$ShareName/$dir'\n");
	$ErrorCnt++;
        return;
    }
}

#
# Write out any hardlinks (if any); only for <= 3.x backups.
#
sub archiveWriteHardLinks
{
    my($fh) = @_;

    return if ( !$PreV4 );
    foreach my $hdr ( @HardLinks ) {
        $hdr->{size} = 0;
	my $name = $hdr->{linkname};
	$name =~ s{^\./}{/};
	if ( defined($HardLinkExtraFiles{$name}) ) {
            $hdr->{linkname} = $HardLinkExtraFiles{$name};
        }
        if ( defined($PathRemove)
              && substr($hdr->{linkname}, 0, length($PathRemove)+1)
                        eq ".$PathRemove" ) {
            substr($hdr->{linkname}, 0, length($PathRemove)+1) = ".$PathAdd";
        }
        TarWriteFileInfo($fh, $hdr);
    }
    @HardLinks = ();
    %HardLinkExtraFiles = ();
}

sub UidLookup
{
    my($uid) = @_;

    $UidCache{$uid} = (getpwuid($uid))[0] if ( !exists($UidCache{$uid}) );
    return $UidCache{$uid};
}

sub GidLookup
{
    my($gid) = @_;

    $GidCache{$gid} = (getgrgid($gid))[0] if ( !exists($GidCache{$gid}) );
    return $GidCache{$gid};
}

sub TarWrite
{
    my($fh, $dataRef) = @_;

    if ( !defined($dataRef) ) {
        #
        # do flush by padding to a full $WriteBufSz
        #
        my $data = "\0" x ($WriteBufSz - length($WriteBuf));
        $dataRef = \$data;
    }
    if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) {
        #
        # just buffer and return
        #
        $WriteBuf .= $$dataRef;
        return;
    }
    my $done = $WriteBufSz - length($WriteBuf);
    if ( syswrite($fh, $WriteBuf . substr($$dataRef, 0, $done))
                                != $WriteBufSz ) {
        print(STDERR "Unable to write to output file ($!)\n");
        exit(1);
    }
    while ( $done + $WriteBufSz <= length($$dataRef) ) {
        if ( syswrite($fh, substr($$dataRef, $done, $WriteBufSz))
                            != $WriteBufSz ) {
            print(STDERR "Unable to write to output file ($!)\n");
            exit(1);
        }
        $done += $WriteBufSz;
    }
    $WriteBuf = substr($$dataRef, $done);
}

sub TarWritePad
{
    my($fh, $size) = @_;

    if ( $size % $tar_header_length ) {
        my $data = "\0" x ($tar_header_length - ($size % $tar_header_length));
        TarWrite($fh, \$data);
    }
}

sub TarWriteHeader
{
    my($fh, $hdr) = @_;

    $hdr->{uname} = UidLookup($hdr->{uid}) if ( !defined($hdr->{uname}) );
    $hdr->{gname} = GidLookup($hdr->{gid}) if ( !defined($hdr->{gname}) );
    my $devmajor = defined($hdr->{devmajor}) ? sprintf("%07o", $hdr->{devmajor})
                                             : "";
    my $devminor = defined($hdr->{devminor}) ? sprintf("%07o", $hdr->{devminor})
                                             : "";
    my $sizeStr;
    if ( $hdr->{size} >= 2 * 65536 * 65536 ) {
	#
	# GNU extension for files >= 8GB: send size in big-endian binary
	#
	$sizeStr = pack("c4 N N", 0x80, 0, 0, 0,
				  $hdr->{size} / (65536 * 65536),
				  $hdr->{size} % (65536 * 65536));
    } elsif ( $hdr->{size} >= 1 * 65536 * 65536 ) {
	#
	# sprintf octal only handles up to 2^32 - 1
	#
	$sizeStr = sprintf("%03o", $hdr->{size} / (1 << 24))
		 . sprintf("%08o", $hdr->{size} % (1 << 24));
    } else {
	$sizeStr = sprintf("%011o", $hdr->{size});
    }
    my $data = pack($tar_pack_header,
                     substr($hdr->{name}, 0, 99),
                     sprintf("%07o", $hdr->{mode}),
                     sprintf("%07o", $hdr->{uid}),
                     sprintf("%07o", $hdr->{gid}),
                     $sizeStr,
                     sprintf("%011o", $hdr->{mtime}),
                     "",        #checksum field - space padded by pack("A8")
                     $hdr->{type},
                     substr($hdr->{linkname}, 0, 99),
                     $hdr->{magic} || 'ustar ',
                     $hdr->{version} || ' ',
                     $hdr->{uname},
                     $hdr->{gname},
                     $devmajor,
                     $devminor,
                     ""         # prefix is empty
                 );
    substr($data, 148, 7) = sprintf("%06o\0", unpack("%16C*",$data));
    TarWrite($fh, \$data);
}

sub TarWriteFileInfo
{
    my($fh, $hdr) = @_;

    #
    # Convert path names to requested (eg: client) charset
    #
    if ( $Charset ne "" ) {
        from_to($hdr->{name},     "utf8", $Charset);
        from_to($hdr->{linkname}, "utf8", $Charset);
    }

    if ( $opts{l} ) {
        print($hdr->{name} . "\n");
        return;
    } elsif ( $opts{L} ) {
        my $owner = "$hdr->{uid}/$hdr->{gid}";

        my $name = $hdr->{name};

        if ( $hdr->{type} == BPC_FTYPE_SYMLINK ) {
            $name .= " -> $hdr->{linkname}";
        }
        $name =~ s/\n/\\n/g;

        printf("%6o %9s %11.0f %s\n",
                                    $hdr->{mode},
                                    $owner,
                                    $hdr->{size},
                                    $name);
        return;
    }

    #
    # Handle long link names (symbolic links)
    #
    if ( length($hdr->{linkname}) > 99 ) {
        my %h;
        my $data = $hdr->{linkname} . "\0";
        $h{name} = "././\@LongLink";
        $h{type} = "K";
        $h{size} = length($data);
        TarWriteHeader($fh, \%h);
        TarWrite($fh, \$data);
        TarWritePad($fh, length($data));
    }

    #
    # Handle long file names
    #
    if ( length($hdr->{name}) > 99 ) {
        my %h;
        my $data = $hdr->{name} . "\0";
        $h{name} = "././\@LongLink";
        $h{type} = "L";
        $h{size} = length($data);
        TarWriteHeader($fh, \%h);
        TarWrite($fh, \$data);
        TarWritePad($fh, length($data));
    }
    TarWriteHeader($fh, $hdr);
}

sub TarWriteFile
{
    my($hdr, $fh, $tarPathOverride) = @_;

    my $tarPath = $hdr->{relPath};
    $tarPath = $tarPathOverride if ( defined($tarPathOverride) );

    $tarPath =~ s{//+}{/}g;
    if ( defined($PathRemove)
            && substr($tarPath, 0, length($PathRemove)) eq $PathRemove ) {
        substr($tarPath, 0, length($PathRemove)) = $PathAdd;
    }
    $tarPath = "./" . $tarPath if ( $tarPath !~ /^\.\// );
    $tarPath =~ s{//+}{/}g;
    $hdr->{name} = $tarPath;

    if ( !$PreV4 && $hdr->{nlinks} > 0 && defined($hdr->{inode}) ) {
        if ( defined($Inode2File{$hdr->{inode}}) ) {
            #
            # Later inodes: emit a hardlink to an existing file in the archive
            # TODO: do path rewrite on link path?
            #
            $hdr->{size} = 0;
            $hdr->{type} = BPC_FTYPE_HARDLINK;
            $hdr->{linkname} = $Inode2File{$hdr->{inode}}{name};
            TarWriteFileInfo($fh, $hdr);
            return;
        } else {
            #
            # First time: remember the data for this inode and dump
            # the file in its original form.
            #
            $Inode2File{$hdr->{inode}} = { %$hdr };
        }
    }

    if ( $hdr->{type} == BPC_FTYPE_DIR ) {
        #
        # Directory: just write the header
        #
        $hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} );
        TarWriteFileInfo($fh, $hdr);
	$DirCnt++;
    } elsif ( $hdr->{type} == BPC_FTYPE_FILE ) {
        my($data, $size);
        #
        # Regular file: write the header and file
        #
        my $f = BackupPC::XS::FileZIO::open($hdr->{fullPath}, 0, $hdr->{compress});
        if ( !defined($f) ) {
            print(STDERR "Unable to open file $hdr->{fullPath} (for $hdr->{name})\n");
            $ErrorCnt++;
	    return;
        }
        TarWriteFileInfo($fh, $hdr);
        if ( $opts{l} || $opts{L} ) {
            $size = $hdr->{size};
        } else {
            while ( $f->read(\$data, $BufSize) > 0 ) {
                if ( $size + length($data) > $hdr->{size} ) {
                    print(STDERR "Error: truncating $hdr->{fullPath} to"
                               . " $hdr->{size} bytes\n");
                    $data = substr($data, 0, $hdr->{size} - $size);
                    $ErrorCnt++;
                }
                TarWrite($fh, \$data);
                $size += length($data);
            }
            $f->close;
            if ( $size != $hdr->{size} ) {
                print(STDERR "Error: padding $hdr->{fullPath} to $hdr->{size}"
                           . " bytes from $size bytes\n");
                $ErrorCnt++;
                while ( $size < $hdr->{size} ) {
                    my $len = $hdr->{size} - $size;
                    $len = $BufSize if ( $len > $BufSize );
                    $data = "\0" x $len;
                    TarWrite($fh, \$data);
                    $size += $len;
                }
            }
            TarWritePad($fh, $size);
        }
	$FileCnt++;
	$ByteCnt += $size;
    } elsif ( $PreV4 && $hdr->{type} == BPC_FTYPE_HARDLINK ) {
        #
        # Note: the meaning of this type changed between BackupPC <= v3.x
        # and >= 4.x.
        #
        # In 3.x a hardlink is stored like a symlink: the contents
        # of the "file" is the path to the linked-to file.
        #
        # In 4.x+ a hardlink's attributes are stored with the
        # inode, and the real attributes are stored by inode
        # for all files with nlinks >= 1.
        #
        # The 4.x case is handled above.
        #
        # TODO: do path rewrite on link path?
        #
        # Hardlink file: either write a hardlink or the complete file
        # depending upon whether the linked-to file will be written
        # to the archive.
        #
        # Start by reading the contents of the link.
        #
        my $f = BackupPC::XS::FileZIO::open($hdr->{fullPath}, 0, $hdr->{compress});
        if ( !defined($f) ) {
            print(STDERR "Unable to open file $hdr->{fullPath} (for $hdr->{name})\n");
            $ErrorCnt++;
            return;
        }
        my $data;
        while ( $f->read(\$data, $BufSize) > 0 ) {
            $hdr->{linkname} .= $data;
        }
        $f->close;
        #
        # Check @ARGV and the list of hardlinked files we have explicity
        # dumped to see if we have dumped this file or not
        #
        my $done = 0;
        my $name = $hdr->{linkname};
        $name =~ s{^\.?/+}{/};
        $name = "/$name" if ( $name !~ m{^/} );
        if ( defined($HardLinkExtraFiles{$name}) ) {
            $done = 1;
        } else {
            foreach my $arg ( @ARGV ) {
                $arg = "/" if ( $arg eq "." );
                $arg =~ s{^\.?/+}{/};
                $arg =~ s{/+$}{};
                $done = 1 if ( $name eq $arg || $name =~ /^\Q$arg\// || $arg eq "" );
            }
        }
        if ( $done ) {
            #
            # Target file will be or was written, so just remember
            # the hardlink so we can dump it later.
            #
            push(@HardLinks, $hdr);
            $SpecialCnt++;
        } else {
            #
            # Have to dump the original file.  Just call the top-level
            # routine, so that we save the hassle of dealing with
            # mangling, merging and attributes.
            #
            my $name = $hdr->{linkname};
            $name =~ s{^\./}{/};
            $HardLinkExtraFiles{$name} = $hdr->{name};
            archiveWrite($fh, $name, $hdr->{name});
        }
    } elsif ( $hdr->{type} == BPC_FTYPE_SYMLINK ) {
        #
        # Symbolic link: read the symbolic link contents into the header
        # and write the header.
        # TODO: do path rewrite on link path?
        #
        my $f = BackupPC::XS::FileZIO::open($hdr->{fullPath}, 0, $hdr->{compress});
        if ( !defined($f) ) {
            print(STDERR "Unable to open symlink file $hdr->{fullPath} (for $hdr->{name})\n");
            $ErrorCnt++;
            return;
        }
        my $data;
        while ( $f->read(\$data, $BufSize) > 0 ) {
            $hdr->{linkname} .= $data;
        }
        $f->close;
        $hdr->{size} = 0;
        TarWriteFileInfo($fh, $hdr);
	$SpecialCnt++;
    } elsif ( $hdr->{type} == BPC_FTYPE_CHARDEV
           || $hdr->{type} == BPC_FTYPE_BLOCKDEV
           || $hdr->{type} == BPC_FTYPE_FIFO ) {
        #
        # Special files: for char and block special we read the
        # major and minor numbers from a plain file.
        #
        if ( $hdr->{type} != BPC_FTYPE_FIFO ) {
            my $f = BackupPC::XS::FileZIO::open($hdr->{fullPath}, 0,
						$hdr->{compress});
            my $data;
            if ( !defined($f) || $f->read(\$data, $BufSize) < 0 ) {
                print(STDERR "Unable to open/read char/block special file"
                           . " $hdr->{fullPath} (for $hdr->{name})\n");
		$f->close if ( defined($f) );
		$ErrorCnt++;
		return;
            }
            $f->close;
            if ( $data =~ /(\d+),(\d+)/ ) {
                $hdr->{devmajor} = $1;
                $hdr->{devminor} = $2;
            }
        }
        $hdr->{size} = 0;
        TarWriteFileInfo($fh, $hdr);
	$SpecialCnt++;
    } elsif ( $hdr->{type} == BPC_FTYPE_SOCKET
           || $hdr->{type} == BPC_FTYPE_UNKNOWN ) {
        #
        # ignore these two file types - these are dynamic file types created
        # by applications as needed
        #
    } else {
        print(STDERR "Got unknown type $hdr->{type} for $hdr->{name}\n");
	$ErrorCnt++;
    }
}
