#!/usr/bin/env bash
usage() {
cat << ENDPOD

=head1 NAME

    backup_nas - back up FSs on a NAS using zfs copy

=head1 SYNOPSIS

    zfs_replication_bin/backup_nas {filesystem1 filesystem2} 

=head1 DESCRIPTION

Find the filesystems on the current NAS using zfs list, then back them
up to datacenter1 or datacenter2 using replication_ABC.sh

Unless names of FSs are specified on the command line, the script
attempts to identify FSs to back up from zfs list. We exclude the NAS
box's own OS and similar FSs.

The script much be run from a directory (~mohan/sysinfo) where
zfs_replication_bin and directories like {destnas}/{sourcenas}
exist. The responsibility for getting to that directory must lie
outside this script, because how each source nas mounts the homedirs
and cds to that directory is idiosyncratic. All paths referenced
within this script are relative to that directory.

Logs go in {destnas}/{sourcenas}/{fs}/backup_{fs}.log

The "tag" used by replication_ABC.sh (used to name snapsnots) is
backup_{fs} . This allows the same snapshots to be used even if the
source and destination NAS boxes change in the future.

=cut

ENDPOD
}

DEBUG=${DEBUG:=}

function debugprint {
    test -n "$DEBUG" && echo "$*"
}

if [ ! -d "zfs_replication_bin" ]; then
    echo "This script must be run from ~mohan/sysinfo"
    exit -1
fi

fspat=''
IFS='|'
fspat="$*"
unset IFS

hostname=`hostname`
if [[ $hostname =~ ^age ]]; then
    isephi=1
    destnas='datacenter2'
else
    isephi=0
    # chaned default to datacenter3 2026-01-29 AA
    destnas='datacenter3'
fi

debugprint "hostname:$hostname isephi:$isephi destnas:$destnas"

if [[ $hostname =~ ^([a-zA-Z0-9]+) ]]; then
    sourcenas=${BASH_REMATCH[1]}
else
    echo hostname starts with strange characters
    exit -1
fi

# number of seconds to sleep between each invocation of replication_ABC.sh
SLEEP=2

dontbackup='(\.system|agingadmin_20210529|iocage|nfs_diag|vm_restore2|vm_backups2|tmp|usr|var|ROOT|tank/ek[xa][1-9][[:space:]])'
# Temporarily exempting vadata_backup and data_from_aws because out of space - removed this restriction. 2026/02/18
#dontbackup='(\.system|data_from_aws|vadata_backup|agingadmin_20210529|iocage|nfs_diag|vm_restore2|vm_backups2|tmp|usr|var|ROOT|tank/ek[xa][1-9][[:space:]])'
#dontbackup='(\.system|agingadmin_20210529|iocage|nfs_diag|vm_restore2|vm_backups2|tmp|usr|var|ROOT|tank/ek[xa][1-9][[:space:]])'

debugprint "sourcenas:$sourcenas destnas:$destnas fspat:$fspat dontbackup:$dontbackup"

function remotecmd {
    local IFS=' '
    rsh=/usr/bin/ssh
    debugprint About to run remote cmd
    debugprint $rsh ${SSHOPT} -o ConnectionAttempts=5 $destnas "$*"
    $rsh ${SSHOPT} -o ConnectionAttempts=5 $destnas "$*"
}

# set IFS to newline only
IFS='
'
for line in $(zfs list)
do
    name=""
    if [[ $line =~ ^(e?tank.?|zroot|pool.)/([-_a-zA-Z0-9]+) ]]; then
        name=${BASH_REMATCH[2]}
    else
        continue
    fi

    if [[ $line =~ ^tank.?/(ek[a-z0-9]+|[_a-zA-Z0-9]+_replication)/([-_a-zA-Z0-9]+) ]]; then
        name=${BASH_REMATCH[2]}
    fi
    # special exception
    if [ "x$name" = "xagedisk7a" ]; then
        name='agedisk7'
    fi
    if [[ $line =~ $dontbackup ]]; then
        debugprint skipping $name because it matches dontbackup 
        continue
    fi
    if [[ "x$sourcenas" = "xtruenas3" ]]; then
       if [[ $line =~ (rwadmin|grantadmin|agingadmin|hradmin|confer|confpriv) ]]; then
          debugprint skipping $name for $sourcenas because actual source for this volume is ns4.
          continue
       fi
    fi
    if [ "x$fspat" != "x" ]; then
        if [[ $name =~ ^($fspat)$ ]]; then
            # do this one
            debugprint "backing up $name"
        else
            continue
        fi
    fi

    [[ $line =~ ^([-_/a-zA-Z0-9]+) ]]
    fullname=${BASH_REMATCH[1]}

    LFS=$fullname
    RFS=$fullname
    TAG="backup_${name}"
    SSHOPT=${SSHOPT:-" -i /data/ssh/replication"}
    SENDOPT="zfssendopt=${ZFSSENDOPT:- -w }"
    RECVOPT="zfsrecvopt=${ZFSRECVOPT:- }"

    debugprint "before customization: name:$name TAG:$TAG SENDOPT:$SENDOPT RECVOPT:$RECVOPT"
    if [ $sourcenas = 'truenas3' ]; then
        # for now not sending large datasets to datacenter3 because
        # that will cause old snapshots to be deleted,
        # making it impossible to continue on datacenter1.
        if [[ $name =~ ^bulk ]]; then
            destnas='datacenter1'
        else
            destnas='datacenter3'
        fi
    elif [ $sourcenas = 'ns1' ]; then

        #        RFS=`echo $RFS | sed -e 's/zroot/...;`

        # I think the way this works is the double forward slash
        # kicks off the substitution during interpolation
        # then the next forward slash after that means
        # separate the pattern from the replacement
        # then any slashes after that are literally
        # interpreted as slash characters in the replacement string
        
        # change this? Maybe not for ns1.
        RFS=${RFS//zroot/tank/ns1_replication}
        
        #        $RFS =~ s!zroot!tank/ns1_replication!;
    elif [ $sourcenas = 'ns4' ]; then

        #        RFS=`echo $RFS | sed -e 's/zroot/...;`

        # I think the way this works is the double forward slash
        # kicks off the substitution during interpolation
        # then the next forward slash after that means
        # separate the pattern from the replacement
        # then any slashes after that are literally
        # interpreted as slash characters in the replacement string
        
        # change this? Maybe not for ns4.
        RFS=${RFS//zroot/tank/ns4_replication}
        
        #        $RFS =~ s!zroot!tank/ns4_replication!;
    elif [ $sourcenas = 'agetruenas7' ]; then
        # tank becomes tank2
        # pool0,pool1 become eka3
        # this also takes care of agedisk7a vs agedisk7
        if [ $name = 'agebulk7' ]; then
            destnas=datacenter4
            # note RFS is still tank2/eka3/... on datacenter4 as it happens
        else
            # We must set this here because otherwise nothing sets
            # it back on the next iteration of the loop
            destnas=datacenter2
        fi
        RFS="tank2/eka3/$name"
        SENDOPT="zfssendopt="
        RECVOPT="zfsrecvopt=-x encryption"
    elif [ $sourcenas = 'agetruenas1' ]; then
        # not enough space on datacenter2
        # so we created datacenter4
        if [[ $name =~ ^(eagedisk1|agebulk9|agedisk9|agedisk10)$ ]]; then
            destnas=datacenter4
            RFS=${RFS//tank/tank2}
        elif [[ $name =~ ^(oldadmin|admin|diskless|ephi_system_backups|agedisk6|agedisk8|agebulk8)$ ]]; then
            destnas=datacenter6
        else
            destnas=datacenter2
        fi
    elif [ $sourcenas = 'mail5' ]; then
        RFS=${RFS//tank/tank/mail_replication}
        debugprint " on mail5 RFS is now $RFS"
        SENDOPT="zfssendopt="
        RECVOPT="zfsrecvopt=-x encryption"
    elif [ $sourcenas = 'freenasgenetics' ]; then
        RFS=${RFS//tank/tank/genetics_replication}
        SENDOPT="zfssendopt="
        RECVOPT="zfsrecvopt=-x encryption"
        debugprint " genetics replication RFS is now $RFS"
    fi
    
    DIR="$destnas/$sourcenas";
    LOGDIR="$DIR/$name"
    # LOCKDIR is seen by replication_ABC and it puts lock files there
    export LOCKDIR=$DIR

    debugprint "name:$name fullname:$fullname LOGDIR:$LOGDIR"
    
    test -d $LOGDIR || mkdir $LOGDIR
    LOGFILE="$LOGDIR/backup_${name}.log"

    if [ "$FORCE_RECV_A" -a -e "$LOGFILE" ]; then
        if tail -10 $LOGFILE | grep -q "cannot receive resume stream: destination $RFS has been modified"; then
            remotecmd zfs recv -A $RFS
        fi
    fi
    
    debugprint "About to run replication cmd:"
    debugprint "./zfs_replication_bin/replication_ABC.sh  ${LFS}   ${destnas}.nber.org   ${RFS}   ${TAG} \"${SSHOPT}\" \"${SENDOPT}\" \"${RECVOPT}\"  >> $LOGFILE 2>&1 &"
    # indentation here is to line up above print cmd with actual executed cmd
                ./zfs_replication_bin/replication_ABC.sh "${LFS}" "${destnas}.nber.org" "${RFS}" "${TAG}" "${SSHOPT}"   "${SENDOPT}"   "${RECVOPT}"   >> $LOGFILE 2>&1 &
    sleep $SLEEP
         
done

