#!/usr/bin/perl -T

# $Id: rsyncd_prepost 1607 2011-12-16 19:18:36Z jhealy $

=head1 NAME

rsyncd_prepost <rsyncd.conf> [module name]

=head1 SYNOPSIS

Add this script as the exec for the B<pre-xfer> and B<post-xfer> options
in C<rsyncd.conf>.  Or, run the two-argument version to initialize the
directory storage for a given rsync module.

=head1 ABSTRACT

A pre/post script for rsync that performs automatic rotation of backups
to keep a history of incremental tranfers.

=head1 DESCRIPTION

The complete documentation for this package is located on our
documentation site.  Please see the following page for a complete
description and instructions:

L<http:E<sol>E<sol>web.suffieldacademy.orgE<sol>ilsE<sol>netadminE<sol>docsE<sol>softwareE<sol>rsync_snapshotE<sol>>

=cut


use warnings;
use strict;

# strftime to format date strings
use POSIX qw(strftime floor);

# direct syslogging (default, unless a logfile is specified)
use Unix::Syslog qw(:macros :subs);


# default log file to write to before per-module logging is enabled.
# note that once a module config has been parsed, logging will switch
# to a per-module log file, so this log is only for high-level config
# errors
my $LOG = undef;

# default syslog params
openlog 'rsyncd_prepost', 0, LOG_FTP;

# Logging severity; higher levels mean more output.  -1 means no output.
my $LOG_SEVERITY = 7;


###
### Configuration variables (can be overridden in per-module config file)
###


=head1 CONFIGURATION VARIABLES

This script tries to derive as much as it can from the supplied
C<rsyncd.conf> file, using the native key-value pairs specified in the
file.

However, some behaviors require additional configuration, which can be
specified by inserting special comments directly into the
C<rsyncd.conf> file.  Any comment line beginning with the special
string:

  #rsyncd_prepost: KEY=VALUE

will be read by this script, and the value assigned to the given key.
Legal key/value pairs for this configuration option are specified
below:

=cut

=head2 $USER and $GROUP

The UID and GID that the backup directories should belong to.  The
script inherits the native C<rsyncd.conf> values I<uid> and I<gid>.
Numeric or textual ids are supported.

Note that this does I<not> change the effective UID or GID of the
script.  The script always runs as the user who started the rsyncd
daemon.  These variables simply affect the default permissions on the
directories created by the script.

=cut
my $USER = 0;
my $GROUP = 0;


=head2 $SNAPSHOT_TYPE

  #rsyncd_prepost: snapshottype=<hardlink|btrfs|zfs>

The script can produce snapshots through different methods, depending
on the capabilities of the underlying [file]system.  By default, the
script uses hardlink, as this should work across multiple operating and
file systems.

If C<hardlink> is used, the script will assume that all transfers will
be hardlinked to the previous "complete" backup.  Clients must provide
the C<--link-dest /complete> argument, or they will receive a
configuration error.

If C<btrfs> or C<zfs> are used, the script will automatically create a
copy-on-write (COW) snapshot of the backup directory immediately after
the transfer completes.  No linking is necessary, so clients
specifiying C<--link-dest> will receive an error message.

=cut
my $SNAPSHOT_TYPE = 'hardlink';


=head2 $DATE_FORMAT

  #rsyncd_prepost: dateformat=<string>

By specifying a strftime(3) date format string as the value, the
'dateformat' key specifies how directories will be named once they are
snapshotted.  The default includes the unix timestamp, year, month,
day, hour, minute, and second.

=cut
my $DATE_FORMAT = '%s_%b_%d_%Y_%H-%M-%S';


=head2 $DATE_ROUND

  #rsyncd_prepost: dateround=<int>

Amount (in seconds) to round modification times when culling directories.
You should set this value to the expected interval between backup runs.
For example, if you round by 86400, all time comparisons for previous
backups will be rounded to midnight on the day they occur.  This allows
comparisons between backups to allow for different ending times based
on how long the transfers took.

=cut
my $DATE_ROUND = 86400;


=head2 $PARTIAL_MAXAGE

  #rsyncd_prepost: partialmaxage=<int>

If a transfer is interrupted, the partial backup is preserved in case
you need the contents.  This variable defines the maximum age (in seconds)
that a partial transfer will be kept before it's deleted to save space.

=cut
my $PARTIAL_MAXAGE = 14 * 86400;


=head2 %SNAPSHOT_PRESERVE

  #rsyncd_prepost: snapshotpreserve=<age:int> <distance:int>

This is a I<multiple-line> configuration option.  Because it changes
values stored in a hash, multiple 'snapshotpreserve' lines can appear
in the config file, each affecting a different I<age> key.

This hash contains "time since current backup" (I<age>) as its keys,
and "time since last snaphot" (I<distance>) as its values.  This
allows the script to preserve snapshots based on how old they are and
how long its been since the last preservation.  Times are given in
seconds.  If -1 is used for the I<distance> parameter, no snapshots
from that time are kept (this gives you a way to delete old backups
completely once they reach a certain age).

Larger-valued keys override smaller-valued ones.  In this way, you
can specify a preservation scheme that keeps several recent backups,
but slowly prunes them off as they get older.  The default configuration
has the following effect:

=over 2

=item Keep all snapshots if they're less than 2 weeks old

=item Only keep weekly snapshots for snapshots 2 weeks to 2 months old

=item Only keep monthly snapshots for snapshots 2 months to 12 months old

=item Only keep yearly snapshots for snapshots 1 year or older

=back

B<NOTE:> if you specifiy I<any> value for snapshotpreserve, you must
specify I<all> values you wish to use.  In other words, as soon as you
specify one key/value pair, all the defaults are erased.  So if you
want to change just one of the three default pairs, you'll need to
re-specify all of them (see below).

Specifying this default configuration in the C<rsyncd.conf> file would
look like this:

  #rsyncd_prepost: snapshotpreserve=1209600 604800
  #rsyncd_prepost: snapshotpreserve=5184000 2592000
  #rsyncd_prepost: snapshotpreserve=31556926 31556926

1209600 is 2 weeks, 604800 is 1 week, 5184000 is 60 days, 2592000 is
30 days, and 31556926 is one year.

=cut
my %SNAPSHOT_PRESERVE =
  (                          # Preserve all snapshots by default

   (14*86400) => (7*86400),  # If snapshot is over 2 weeks old, only
                             # keep it if it's spaced a week from the
                             # last preserved snapshot

   (60*86400) => (30*86400), # If snapshot is over 2 months old, only
                             # keep it if it's spaced a month from the
                             # last preserved snapshot

   (31556926) => (31556926)  # If snapshot is over a year old, only
                             # keep it if it's spaced a year from the
                             # last preserved snapshot
   );



###
### Base structure directory names (don't need to edit unless you
### hate the names we've chosen for the directories)
###

# directory to hold incomplete transfers
my $PARTIAL = 'partial';

# directory to hold archived complete transfers
# ZFS forces you to use .zfs/snapshot, so if zfs is the $SNAPSHOT_TYPE
# that will be the value automatically
my $SNAPSHOT = 'snapshot';

# directory to hold expired transfers marked for deletion
my $TRASH = 'trash';

# directory to hold in-progress and most-recent transfers
my $RSYNC = 'rsync';

# directory holding the last complete transfer
my $COMPLETE = 'complete';

# directory that is the target of the current in-progress transfer
my $TRANSFER = 'transfer';



#########################################################################
#     End of user customization (should not need to edit below here)    #
#########################################################################


###
### Parse Environment Variables
###

# Clean up the path
$ENV{'PATH'} = join(':', (
			  '/usr/local/sbin',
			  '/usr/local/bin',
			  '/opt/local/sbin',
			  '/opt/local/bin',
			  '/usr/sbin',
			  '/usr/bin',
			  '/sbin',
			  '/bin'
			 ));


# Read in the environment variables from rsync (see rsyncd.conf(5))
# Because we're running in taint mode, untaint variables along the way.
my $RSYNC_MODULE_NAME = undef;
if (defined($ENV{'RSYNC_MODULE_NAME'}) &&
    $ENV{'RSYNC_MODULE_NAME'} =~ /^([^\/\]]+)$/) {
  $RSYNC_MODULE_NAME = $1;
}


my $RSYNC_MODULE_PATH = undef;
my $BASE = undef;
if (defined($ENV{'RSYNC_MODULE_PATH'}) &&
    $ENV{'RSYNC_MODULE_PATH'} =~ /^(\/.*)$/) {
  $RSYNC_MODULE_PATH = $1;
  if ($RSYNC_MODULE_PATH =~ /^(\/.*)\/rsync\/?$/) {
    $BASE = $1;
  }
}


my $RSYNC_HOST_ADDR = undef;
if (defined($ENV{'RSYNC_HOST_ADDR'})
    && $ENV{'RSYNC_HOST_ADDR'} =~ /^([\d.]+)$/) {
  $RSYNC_HOST_ADDR = $1;
}


my $RSYNC_HOST_NAME = undef;
if (defined($ENV{'RSYNC_HOST_NAME'}) &&
    $ENV{'RSYNC_HOST_NAME'} =~ /^([a-zA-Z0-9.-]+)$/) {
  $RSYNC_HOST_NAME = $1;
}


my $RSYNC_USER_NAME = '*ANONYMOUS*';
if (defined($ENV{'RSYNC_USER_NAME'})
    && $ENV{'RSYNC_USER_NAME'} =~ /^([a-zA-Z0-9_-]+)$/) {
  $RSYNC_USER_NAME = $1;
}


my $RSYNC_PID = undef;
if (defined($ENV{'RSYNC_PID'}) && $ENV{'RSYNC_PID'} =~ /^(\d+)$/) {
  $RSYNC_PID = $1;
}


my $RSYNC_REQUEST = undef; # pre-xfer only
if (defined($ENV{'RSYNC_REQUEST'}) &&
    $ENV{'RSYNC_REQUEST'} =~ /^($RSYNC_MODULE_NAME.*)$/) {
  $RSYNC_REQUEST = $1;
}

my @RSYNC_ARGS = (); # pre-xfer only
if (defined($ENV{'RSYNC_ARG0'}) && $ENV{'RSYNC_ARG0'} eq 'rsyncd') {
  my $i = 0;
  while (defined($ENV{"RSYNC_ARG$i"})) {
    push(@RSYNC_ARGS, $ENV{"RSYNC_ARG$i"});
    $i++;
  }
}

my $RSYNC_EXIT_STATUS = undef; # post-xfer only
if (defined($ENV{'RSYNC_EXIT_STATUS'}) &&
    $ENV{'RSYNC_EXIT_STATUS'} =~ /^([\d-]+)$/) {
  $RSYNC_EXIT_STATUS = $1;
}


my $RSYNC_RAW_STATUS = undef; # post-xfer only
if (defined($ENV{'RSYNC_RAW_STATUS'}) &&
    $ENV{'RSYNC_RAW_STATUS'} =~ /^([\d-]+)$/) {
  $RSYNC_RAW_STATUS = $1;
}


###
### Subroutines
###


=head1 METHODS

=cut



=head2 logit($message, $severity) [returns I<void>]

If the global severity level is set at least as high as $severity,
log the given $message to the currently open log file.

=over 2

=item C<message> (I<string> [B<required>])

The message to log

=item C<severity> (I<severity> [B<optional; defaults to 0>])

The severity for this message

=back

=cut
sub logit($;$) {
  my $message = $_[0];
  my $severity = exists($_[1]) ? $_[1] : 2;

  chomp $message;
  $message .= "\n";

  unless ($severity > $LOG_SEVERITY) {
    my $fmt = "(%15s) %s@%s: %s\n";
    my @arg = ($RSYNC_HOST_ADDR, $RSYNC_USER_NAME, $RSYNC_MODULE_NAME, $message);

    # default to system err when run from the console
    if (!defined($RSYNC_PID)) {
      print STDERR "$message\n";
    }
    elsif (defined($LOG)) {
      printf $LOG "%19s $fmt", strftime('%Y/%m/%d %H:%M:%S', localtime), @arg;
    }
    else { # default to syslog
      syslog $severity, $fmt, @arg;
    }
  }

} # end sub logit



=head2 killit($message, $severity) [returns I<void>]

Logs the given message, just as in logit(), but also exits with an
error status of 1 (terminating any transfer).

See logit() for parameters and other information.

=cut
sub killit($;$) {
  my $message = $_[0];
  my $severity = exists($_[1]) ? $_[1] : 2;

  logit($message, $severity);

  exit 1;

} # end sub killit



=head2 readConfig() [returns I<void>]

Parses the rsyncd.conf file specified as the first argument to the script
(if provided).  Overrides any defaults in the program with the values
specified in the config file.

The config file can have native rsync arguments, but also special comments
of the form:

  # rsyncd_prepost: KEY=VALUE

Which will be read by this routine.  This allows you to specify options for
both rsyncd and this script in the same config file (which is convienent
because they share some of the same options).

=cut
sub readConfig() {

  my $conf = $ARGV[0];

  if (defined($conf)) {
    if (! -r $conf) {
      logit("Configuration file '$conf' not readable.  Terminating transfer", 3);
      exit 1;
    }
  }
  else {
    logit("No configuration file specified; using defaults", 5);
    return;
  }

  # The config file parsing can be in one of several phases:
  #   Global (options before a named section)
  #   Other Section (options in a named section that is not the current module)
  #   My Section (options in a named section matching the current module)
  my $mysec = 1;

  # track snapshot times in a temporary hash
  my %snapshot = ();

  # keep track of long lines that are continued with a backslash
  my $multi = '';

  if (open(CONFIG, $conf)) {
    while (my $line = <CONFIG>) {

      chomp $line;

      # strip any leading whitespace or comment markers
      if ($line =~ /^\s*#[\s#]*(.*)$/) {
	if ($multi eq '') {
	  $line = "#$1";
	}
	elsif (substr($multi, 0, 1) eq '#') {
	  $line = $1;
	}
	else {
	  $line = '';
	}
      }
      elsif ($line =~ /^\s*(.*)$/) {
	$line = $1;
      }
      # end whitespace strip

      # include any data from a previous continuation lines
      $line = $multi . $line;
      $multi = '';

      # check for multi-line configs, and accumulate as necessary
      if ($line =~ /^(.*)\\\s*$/) {
	$multi = $1;
	next;
      }

      # parse line
      if ($line =~ /^\[\s*(.*\S)\s*\]/) {
	# section
	my $section = $1;
	$section =~ s/\s//g;

	if ($section eq $RSYNC_MODULE_NAME) {
	  $mysec = 1;
	}
	else {
	  $mysec = 0;
	}
      }
      elsif ($mysec == 0) {
	# ignore if we're not in our named section or global
	next;
      }
      elsif ($line eq '' || $line =~ /^\s*$/) {
	# only whitespace (ignore)
      }
      elsif ($line =~ /^\s*#\s*rsyncd_prepost:\s*(.*\S)\s*=\s*(.*\S)\s*$/) {
	# config comment
	my $key = $1;
	my $val = $2;
	$key =~ s/\s//g;

 	if ($key eq 'snapshottype') {
	  logit("Changing snapshottype to: '$val'", 7);
 	  $SNAPSHOT_TYPE=$val;
 	}
 	elsif ($key eq 'dateformat') {
	  logit("Changing date format to: '$val'", 7);
 	  $DATE_FORMAT=$val;
 	}
 	elsif ($key eq 'dateround') {
	  if ($val =~ /^(\d+)$/) {
	    logit("Changing date rounding to: '$val'", 7);
	    $DATE_ROUND=$1;
	  }
	  else {
	    logit("Unparseable 'dateround' value: '$val'", 4);
	  }
 	}
 	elsif ($key eq 'partialmaxage') {
	  if ($val =~ /^(\d+)$/) {
	    logit("Changing partial maxage to: '$val'", 7);
	    $PARTIAL_MAXAGE=$1;
	  }
	  else {
	    logit("Unparseable 'partialmaxage' value: '$val'", 4);
	  }
 	}
 	elsif ($key eq 'snapshotpreserve') {
	  if ($val =~ /(\d+)\s+(-?\d+)/) {
	    logit("Setting snapshot preserve '$1' to '$2'", 7);
	    $snapshot{$1} = $2;
	  }
	  else {
	    logit("Unparseable 'snapshotpreserve' value: '$val'", 4);
	  }
 	}
 	else {
 	  logit("Unknown rsyncd_prepost key: '$key'", 4);
 	}
      }
      elsif ($line =~ /^#/) {
	# generic comment (ignore)
	next;
      }
      elsif ($line =~ /^([^=]*\S)\s*=\s*(.*\S)\s*$/) {
	# rsync config var
	my $key = $1;
	my $val = $2;
	$key =~ s/\s//g;

	if ($key eq 'syslogfacility') {
	  logit("Changing syslog facility to: '$val'", 7);

	  if ($val eq 'auth') {
	    openlog 'rsyncd_prepost', 0, LOG_AUTH;
	  }
	  elsif ($val eq 'authpriv') {
	    openlog 'rsyncd_prepost', 0, LOG_AUTHPRIV;
	  }
	  elsif ($val eq 'cron') {
	    openlog 'rsyncd_prepost', 0, LOG_CRON;
	  }
	  elsif ($val eq 'daemon') {
	    openlog 'rsyncd_prepost', 0, LOG_DAEMON;
	  }
	  elsif ($val eq 'ftp') {
	    openlog 'rsyncd_prepost', 0, LOG_FTP;
	  }
	  elsif ($val eq 'kern') {
	    openlog 'rsyncd_prepost', 0, LOG_KERN;
	  }
	  elsif ($val eq 'lpr') {
	    openlog 'rsyncd_prepost', 0, LOG_LPR;
	  }
	  elsif ($val eq 'mail') {
	    openlog 'rsyncd_prepost', 0, LOG_MAIL;
	  }
	  elsif ($val eq 'news') {
	    openlog 'rsyncd_prepost', 0, LOG_NEWS;
	  }
	  elsif ($val eq 'syslog') {
	    openlog 'rsyncd_prepost', 0, LOG_SYSLOG;
	  }
	  elsif ($val eq 'user') {
	    openlog 'rsyncd_prepost', 0, LOG_USER;
	  }
	  elsif ($val eq 'uucp') {
	    openlog 'rsyncd_prepost', 0, LOG_UUCP;
	  }
	  elsif ($val eq 'local0') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL0;
	  }
	  elsif ($val eq 'local1') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL1;
	  }
	  elsif ($val eq 'local2') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL2;
	  }
	  elsif ($val eq 'local3') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL3;
	  }
	  elsif ($val eq 'local4') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL4;
	  }
	  elsif ($val eq 'local5') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL5;
	  }
	  elsif ($val eq 'local6') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL6;
	  }
	  elsif ($val eq 'local7') {
	    openlog 'rsyncd_prepost', 0, LOG_LOCAL7;
	  }
	  else {
	    logit("Unknown logging facility '$val'; using 'ftp'", 4);
	    openlog 'rsyncd_prepost', 0, LOG_FTP;
	  }
	} # end syslog facility
	elsif ($key eq 'logfile') {
	  logit("Changing logging from syslog to file '$val'", 6);
	  open ($LOG, ">>$val") or logit("Couldn't open log '$val': $!", 3);
	}
	elsif ($key eq 'uid') {
	  if ($val =~ /[^\d-]/) {
	    $USER = getpwnam($val);
	  }
	  else {
	    $USER = $val;
	  }

	  logit("Changing user to: '$val' ($USER)", 7);
	}
	elsif ($key eq 'gid') {
	  if ($val =~ /[^\d-]/) {
	    $GROUP = getgrnam($val);
	  }
	  else {
	    $GROUP = $val;
	  }

	  logit("Changing group to: '$val' ($GROUP)", 7);
	}
	elsif ($key eq 'path') {
	  if (!defined($RSYNC_PID)) {
	    # assign the path from the config file
	    if ($val =~ /^(\/.*)$/) {
	      $RSYNC_MODULE_PATH = $1;
	      if ($RSYNC_MODULE_PATH =~ /^(\/.*)\/rsync\/?$/) {
		$BASE = $1;
	      }
	    }
	  }
	  elsif ($RSYNC_MODULE_PATH ne $val) {
	    logit("Sanity error: configured module path\n" .
		  "$RSYNC_MODULE_PATH\ndiffers from reported path\n" .
		  "  $val\n" .
		  "ABORTING TRANSFER");
	  }
	}
      } # end rsyncd config option
      else {
	logit("Unrecognized config file line: '$line'", 3);
      } # end parse

    } # end while

    # only swap SNAPSHOT_PRESERVE if something was defined
    if (%snapshot) {
      %SNAPSHOT_PRESERVE = %snapshot;
    }

    # ZFS overrides the snapshot dir (mandatory location)
    if ($SNAPSHOT_TYPE eq 'zfs') {
      $SNAPSHOT = '.zfs/snapshot';
    }
  }
  else {
    logit("Unable to read rsyncd.conf file '$conf': $!.  Terminating", 3);
    exit 1;
  }

} # end sub readConfig



=head2 argSet($arg) [returns I<boolean>]

Returns 1 if the given option was passed to rsync as an argument.

This function accepts two kinds of arguments to search for: I<full>
and I<short>.  A I<short> arg starts with a single leading dash ("-"),
and will match a flag argument specified anywhere in a longer string
of flags.  A I<full> argument will only match a single argument if it
matches completely.

For example, the I<full> argument "--link-dest" will only match if
there is a single argument with that exact name.  Meanwhile, the
I<short> argument "-n" will match both a single argument that is
exactly "-n", but also the argument "-vnlHogDt", because it is a flag
concatenated together with others.

=over 2

=item C<arg> (I<string> [B<required>])

The argument to search for

=back

=cut
sub argSet($;) {
  my $arg = $_[0];

  my $flag = 0;
  my $found = 0;

  logit("Searching for argument '$arg'", 7);

  # check to see if this is a single-letter flag argument
  if ($arg =~ /^-(.)$/) {
    $flag = 1;
    $arg = $1;
    logit("Identified single-letter flag '$arg'", 7);
  }

  for my $a (@RSYNC_ARGS) {
    # if we're searching for a flag, and the arg starts with ONE dash...
    if ($flag && $a =~ /^-([^-]+)$/) {
      # see if the letter we're searching for appears in the string of flags
      if ($1 =~ /$arg/) {
	$found = 1;
      }
    }
    # otherwise, just do an exact whole-string match
    elsif ($a eq $arg) {
      $found = 1;
    }

    last if $found;
  }

  return $found;

} # end sub argSet


=head2 mtime(C<$file>) [returns I<int>]

Given a file (or directory) name, this method gets the last-modified time
of the file and returns it.

=over 2

=item C<file> (I<string> [B<required>])

The path to the file or directory to use as the source of the modify time

=back

=cut
sub mtime($;) {
  my $file = $_[0];

  my $mtime = undef;

  if (stat($file)) {
    my @s = stat(_);
    $mtime = $s[9];
  }

  return $mtime;

} # end sub mtime


=head2 mtimeName(C<$file>) [returns I<string>]

Given a file (or directory) name, this method gets the last-modified time
of the file and returns a date string formatted by $DATE_FORMAT.

=over 2

=item C<file> (I<string> [B<required>])

The path to the file or directory to use as the source of the date string

=back

=cut
sub mtimeName($;) {
  my $file = $_[0];

  my $mtime = mtime($file);

  if (defined($mtime)) {
    return strftime($DATE_FORMAT, localtime($mtime));
  }

  return undef;

} # end sub mtimeName


=head2 timeRound(C<$tile>) [returns I<int>]

Given a time value in seconds-since-epoch (such as an mtime or time() value),
returns the time value rounded to the next-lowest $DATE_ROUND value.

For example, if $DATE_ROUND is 86400, any time value provided will be rounded
to midnight on that day.

=over 2

=item C<time> (I<int> [B<required>])

A time value in seconds-since-epoch, such as that returned by L<time()>

=back

=cut
sub timeRound($;) {

  my $time = $_[0];

  if (defined($time) && $time =~ /^-?\d+$/) {
    my $rtime = undef;

    if (defined($time)) {
      $rtime = floor($time / $DATE_ROUND) * $DATE_ROUND;
    }

    return $rtime;
  }

  logit("Non-numeric time '$time' passed to timeRound", 3);
  return undef;

} # end sub timeRound


=head2 checkDestDir() [returns I<void>]

Checks the incoming rsync request to ensure it is trying to write to
an appropriate directory.  If it isn't, this method terminates the script
with an error to prevent any writing.

=cut
sub checkDestDir() {

  unless ($RSYNC_REQUEST eq "$RSYNC_MODULE_NAME/$TRANSFER/") {
    logit("No rsync target of /$TRANSFER/ found; disallowing transfer", 3);
    exit 1;
  }

  my $linkdestSet = 0;

  # check to make sure the hard-link parameter is set
  for my $i (0 .. $#RSYNC_ARGS) {
    $linkdestSet = 1 if ($RSYNC_ARGS[$i] eq '--link-dest' &&
			 $RSYNC_ARGS[$i+1] eq "/$COMPLETE");
  }

  # now check the presence/absence of the link-dest flag against
  # what we're expecting
  if ($linkdestSet==0 && $SNAPSHOT_TYPE eq 'hardlink') {
    logit("No --link-dest /$COMPLETE arg found, but server snapshottype is 'hardlink'; disallowing transfer", 3);
    exit 1;
  }
  elsif ($linkdestSet==1 && ($SNAPSHOT_TYPE eq 'btrfs' || $SNAPSHOT_TYPE eq 'zfs')) {
    logit("Spurious --link-dest /$COMPLETE arg found when server snapshottype is '$SNAPSHOT_TYPE'; disallowing transfer", 3);
    exit 1;
  }

} # end sub checkDestDir



=head2 ensureBaseStructure() [returns I<void>]

Checks for an existing module structure, creating one if necessary.

=cut
sub ensureBaseStructure() {

  if (! -e $BASE) {
    logit("Base dir does not exist; creating one for you", 6);
    if ($SNAPSHOT_TYPE eq 'zfs') {
      system('zfs', 'create',
	     '-o', 'atime=off',
	     '-o', 'compression=on',
	     '-o', 'snapdir=visible',
	     substr($BASE, 1));
      chmod(0755, $BASE);
    }
    else {
      mkdir("$BASE", 0755) or killit("Could not create $BASE: $!", 3);
    }
  }

  if (! -e "$BASE/$PARTIAL") {
    logit("Partial dir does not exist; creating one for you", 6);
    mkdir("$BASE/$PARTIAL", 0755) or
      killit("Could not create $BASE/$PARTIAL: $!", 3);
  }

  if (! -e "$BASE/$SNAPSHOT") {
    logit("Snapshot dir does not exist; creating one for you", 6);
    mkdir("$BASE/$SNAPSHOT", 0755) or
      killit("Could not create $BASE/$SNAPSHOT: $!", 3);
  }

  if (! -e "$BASE/$TRASH") {
    logit("Trash dir does not exist; creating one for you", 6);
    mkdir("$BASE/$TRASH", 0755) or killit("Could not create $BASE/$TRASH: $!", 3);
  }

  if (! -e "$BASE/$RSYNC") {
    logit("Rsync dir does not exist; creating one for you", 6);
    mkdir("$BASE/$RSYNC", 0770) or killit("Could not create $BASE/$RSYNC: $!", 3);
    chown($USER, $GROUP, "$BASE/$RSYNC") or
      logit("Unable to change ownership of '$BASE/$RSYNC' " .
	    "to $USER:$GROUP", 3);
  }

  if ($SNAPSHOT_TYPE eq 'hardlink' && ! -e "$BASE/$RSYNC/$COMPLETE") {
    logit("Complete dir does not exist; creating one with pre-epoch date", 6);
    mkdir("$BASE/$RSYNC/$COMPLETE", 0770) or
      killit("Could not create $BASE/$RSYNC/$COMPLETE: $!", 3);
    chown($USER, $GROUP, "$BASE/$RSYNC/$COMPLETE") or
      killit("Unable to change ownership of '$BASE/$RSYNC/$COMPLETE' " .
	     "to $USER:$GROUP", 3);
    utime(-1, -1, "$BASE/$RSYNC/$COMPLETE");
  }

} # end sub ensureBaseStructure


=head2 ensureTransferDir() [returns I<void>]

Checks for existing files in the "transfer" dir, allowing them to
remain if the rsync "--ignore-existing" flag has been set.  Otherwise,
move it out of the way and creates a new empty dir for incoming
backup.

=cut
sub ensureTransferDir() {

  my $transfer = "$BASE/$RSYNC/$TRANSFER";

  if ($SNAPSHOT_TYPE eq 'hardlink' && -e $transfer) {
    logit("Found existing transfer directory", 7);
    if (argSet('--ignore-existing')) {
      logit("Leaving transfer in place and resuming existing transfer", 6);
    }
    else {
      my $name = mtimeName($transfer);
      logit("Moving partial transfer to $name", 6);
      rename($transfer, "$BASE/$PARTIAL/$name");
    }
  }

  if (! -e $transfer) {
    logit("Creating new transfer directory", 7);

    if ($SNAPSHOT_TYPE eq 'hardlink' || $SNAPSHOT_TYPE eq 'zfs') {
      mkdir($transfer, 0770); # u+rwx,g+rwx,o-rwx
    }
    elsif ($SNAPSHOT_TYPE eq 'btrfs') {
      system('btrfs', 'subvolume', 'create', $transfer);
      chmod(0770, $transfer);
    }

    if (chown($USER, $GROUP, $transfer) < 1) {
      killit("Unable to change ownership of '$transfer' to $USER:$GROUP", 3);
    }
  }

} # end sub ensureTransferDir


=head2 cullPartialDirs() [returns I<void>]

Looks in the "partial" directory and moves any dirs whose age exceeds
$PARTIAL_MAXAGE to the "trash" folder.

=cut
sub cullPartialDirs() {
  if (opendir(PARTIAL, "$BASE/$PARTIAL")) {
    while (defined(my $file = readdir(PARTIAL))) {
      if ($file ne '.' && $file ne '..' &&
	  -d "$BASE/$PARTIAL/$file" && $file =~ m#^([^/]+)$#) { # must untaint

	$file = $1;
	my $mtime = mtime("$BASE/$PARTIAL/$file");
	if (time - $mtime > $PARTIAL_MAXAGE) {
	  logit("Removing expired partial dir '$file'", 6);
	  if ($SNAPSHOT_TYPE eq 'hardlink') {
	    rename("$BASE/$PARTIAL/$file", "$BASE/$TRASH/$file" .
		   "_partial_deleted_at_" . strftime($DATE_FORMAT, localtime));
	  }
	  elsif ($SNAPSHOT_TYPE eq 'btrfs') {
	    symlink("$BASE/$PARTIAL/$file", "$BASE/$TRASH/$file" .
		    "_partial_deleted_at_" . strftime($DATE_FORMAT, localtime));
	  }
	  elsif ($SNAPSHOT_TYPE eq 'zfs') { # just move the symlink
	    rename("$BASE/$PARTIAL/$file", "$BASE/$TRASH/$file" .
		    "_partial_deleted_at_" . strftime($DATE_FORMAT, localtime));
	  }
	}
      }
    }
    closedir(PARTIAL);
  }
  else {
    logit("Couldn't read partial directory '$BASE/$PARTIAL': $!", 3);
  }
} # end sub cullPartialDirs


=head2 snapshotComplete() [returns I<boolean>]

Archive the current "complete" directory to the "snapshot" directory,
renaming it based on its mtime.  Returns 1 if the rename succeeded, or
0 otherwise.

For non-hardlink snapshotting, this method doesn't do anything (the
snapshot step takes place in the "transferComplete()" subroutine).

=cut
sub snapshotComplete() {

  if ($SNAPSHOT_TYPE eq 'hardlink') {
    my $mname = mtimeName("$BASE/$RSYNC/$COMPLETE");
    logit("Hardlink snapshotting '$COMPLETE' dir", 6);

    if (!rename("$BASE/$RSYNC/$COMPLETE", "$BASE/$SNAPSHOT/$mname")) {
      killit("Could not snapshot '$COMPLETE' to $mname: $!", 2);
    }
  }

  return 1;
} # end sub snapshotComplete


=head2 transferComplete() [returns I<void>]

Timestamp the transfer directory to the current time, so we know when it
finished.

Rename the just-completed transfer from "transfer" to "complete" for
hardlink snapshots.  This method will not overwrite an existing
"complete" folder and will log an error.

For non-hardlink snapshots, the transfer directory has a new snapshot made.

=cut
sub transferComplete() {

  # undef for both args means "current"
  utime(undef, undef, "$BASE/$RSYNC/$TRANSFER");

  if ($SNAPSHOT_TYPE eq 'hardlink') {
    if (-e "$BASE/$RSYNC/$COMPLETE") {
      logit("Can't move '$TRANSFER'; '$COMPLETE' still exists!", 3);
    }
    else {
      if (rename("$BASE/$RSYNC/$TRANSFER", "$BASE/$RSYNC/$COMPLETE")) {
	return 1;
      }

      killit("Unable to rename '$TRANSFER' to '$COMPLETE': $!", 2);
    }
  }
  # for snapshotting filesystems, we move $TRANSFER (not $COMPLETE)
  # becuase there's no need to set up the link-dest for next time like
  # we do for hardlink
  elsif ($SNAPSHOT_TYPE eq 'btrfs') {
    my $mname = mtimeName("$BASE/$RSYNC/$TRANSFER");
    logit("btrfs snapshotting '$TRANSFER' dir", 6);

    # if sucessful transfer from this time period already exists,
    # add a nonce to our name to make it unique
    if (-e "$BASE/$SNAPSHOT/$mname") {
      my $nonce = 1;
      while (-e "$BASE/$SNAPSHOT/${mname}_${nonce}") {
	$nonce++;
      }
      $mname .= '_' . $nonce;
    }

    if (system('btrfs', 'subvolume', 'snapshot',
	       "$BASE/$RSYNC/$TRANSFER", "$BASE/$SNAPSHOT/$mname") == 0) {
      return 1;
    }

    killit("Could not snapshot '$TRANSFER' to $mname: btrfs subvol snapshot failed: $?", 2);
  }
  elsif ($SNAPSHOT_TYPE eq 'zfs') {
    my $mname = mtimeName("$BASE/$RSYNC/$TRANSFER");
    logit("zfs snapshotting '$TRANSFER' dir", 6);

    # jhealy: not necessary; we'll just check the .zfs/snapshot dir directly
    # my %snaps = ();
    #
    # # get the list of current snapshots
    # if (open (ZFSLIST, '-|', 'zfs', 'list', '-H', '-r', '-t', 'snapshot', $BASE)) {
    #   while (my $line=<ZFSLIST>) {
    # 	chomp $line;
    # 	my @stats = split("\t", $line);
    # 	$snaps{substr($stats[0], length($BASE))} = $stats[0];
    #   }
    #   close(ZFSLIST);
    # }
    # else {
    #   logit("zfs could not list exsiting snapshots: $!",1);
    # }

    # if sucessful transfer from this time period already exists,
    # add a nonce to our name to make it unique
    if (-e "$BASE/$SNAPSHOT/$mname") {
      my $nonce = 1;
      while (-e "$BASE/$SNAPSHOT/${mname}_${nonce}") {
	$nonce++;
      }
      $mname .= '_' . $nonce;
    }

    if (system('zfs', 'snapshot', substr($BASE, 1) . '@' . $mname) == 0) {
      return 1;
    }

    killit("Could not snapshot '$TRANSFER' to $mname: zfs snapshot failed: $?", 2);
  }

} # end sub transferComplete


=head2 cullSnapshotDirs() [returns I<void>]

Iterates over the snapshot backups and removes those that are no longer
needed according to the retention policy set in %SNAPSHOT_PRESERVE.

=cut
sub cullSnapshotDirs() {

  my $current = mtime("$BASE/$RSYNC/$TRANSFER") ||
    mtime("$BASE/$RSYNC/$COMPLETE") || time;

  # find all the directories already marked for deletion so we can skip them
  my %trashed = ();

  if (opendir(TRASH, "$BASE/$TRASH")) {
    while (defined(my $file = readdir(TRASH))) {
      if (-l "$BASE/$TRASH/$file" && $file =~ m#^([^/]+)$#) { # must untaint

	$file = $1;

	# remember directories we've already trashed
	my $target = readlink("$BASE/$TRASH/$file");
	$trashed{$target} = 1 if ($target ne '');
      }
    }

    closedir(TRASH);
  }

  my %dirs = ();

  if (opendir(SNAPSHOT, "$BASE/$SNAPSHOT")) {

    # get all possible directories
    while (defined(my $file = readdir(SNAPSHOT))) {

      if ($file ne '.' && $file ne '..' &&
	  -d "$BASE/$SNAPSHOT/$file" && $file =~ m#^([^/]+)$#) { # must untaint

	$file = $1;

	next if ($file =~ 'partial'); # don't count partial transfers
	next if ($trashed{"$BASE/$SNAPSHOT/$file"}); # skip trashed items

	# otherwise, we should consider it

	if ($SNAPSHOT_TYPE eq 'zfs') {
	  # ZFS snapshots the entire $BASE tree, not just the
	  # transfer folder.  Thus, the mtime of the snapshot
	  # is NOT the mtime of the rsync transfer, but of the
	  # base folder itself.  We must dive into the snapshot
	  # to grab the mtime of the transfer.

	  $dirs{mtime("$BASE/$SNAPSHOT/$file/$RSYNC/$TRANSFER")} = $file;

	}
	else {
	  $dirs{mtime("$BASE/$SNAPSHOT/$file")} = $file;
	}
      }

    }

    closedir(SNAPSHOT);

    # keep track of oldest backup (32-bit MIN_INT)
    my $last = -(2**31);

    # iterate over the directories, oldest first
    for my $age (sort {$a <=> $b} keys %dirs) {
      my $dir = $dirs{$age};

      my $cDistance = (timeRound($current) - timeRound($age));
      my $lDistance = (timeRound($age) - timeRound($last));

      # calculate unrounded distances for debugging
      my $cRaw = $current - $age;
      my $lRaw = $age - $last;

      my $min = 0;

      logit("Checking snapshot '$dir', $cDistance/$cRaw older than current, " .
	    "$lDistance/$lRaw younger than last", 7);

      # find the retention policy that matches this age
      for my $preserve (sort {$b <=> $a} keys %SNAPSHOT_PRESERVE) {
	if ($cDistance > $preserve) {
	  $min = $SNAPSHOT_PRESERVE{$preserve};
	  logit("$dir matched $preserve rule, min distance is $min", 7);
	  last; # bail out; we've found a match
	}
      }

      # finally, check to see if our age is greater than the distance
      # specified by the preservation rules
      if ($min < 0 || $lDistance < $min) {
	logit("$dir distance $lDistance/$lRaw is under $min; removing", 7);
	logit("Removing expired snapshot dir '$dir'", 6);
	  if ($SNAPSHOT_TYPE eq 'hardlink') {
	    rename("$BASE/$SNAPSHOT/$dir", "$BASE/$TRASH/$dir" .
		   "_deleted_at_" . strftime($DATE_FORMAT, localtime));
	  }
	  elsif ($SNAPSHOT_TYPE eq 'btrfs' || $SNAPSHOT_TYPE eq 'zfs') {
	    symlink("$BASE/$SNAPSHOT/$dir", "$BASE/$TRASH/$dir" .
		    "_deleted_at_" . strftime($DATE_FORMAT, localtime));
	  }
      }
      else {
	# remember this as the new "most recent" snapshot for comparisons
	$last = $age;
	logit("$dir distance $lDistance/$lRaw is over $min; keeping", 7);
      }

    } # end foreach age/dir
  } # end if opendir
  else {
    logit("Couldn't read snapshot directory '$BASE/$SNAPSHOT': $!", 3);
  }

} # end sub cullSnapshotDirs



###
### Main
###

if (!defined($RSYNC_PID)) {

  if (defined($ARGV[1]) && $ARGV[1] =~ /^([^\/\]]+)$/) {
    $RSYNC_MODULE_NAME = $1;
    logit("User requested module creation for module '$RSYNC_MODULE_NAME'", 0);
    readConfig();
    ensureBaseStructure();
    exit 0;
  }
  else {
    killit(<<'EOE', 0);

rsyncd_prepost <rsyncd.conf> [modulename]

  This script is usually called as a pre- or post-xfer script for
  rsyncd, as defined in the rsyncd.conf(5) man page.  For more information,
  please see the package documentation at:

  http://web.suffieldacademy.org/ils/netadmin/docs/software/rsync_snapshot/

  If the optional [modulename] is specified, this script will read the provided
  configuration file and attempt to create the base directories on the
  filesystem for this module (used for first-time setup).

EOE
  }
}

# first, do no harm: don't change anything if --dry-run is requested
# Our only choice is to terminate the transfer, because --dry-run does not
# get passed as an argument to the post-xfer step.  Thus, there's no way
# for the cleanup portion of the script to know that the transfer was bogus,
# and should not be snapshotted.  We just die early and with a prominent
# error message so the user knows that "--dry-run" isn't supported.
if (argSet('-n') || argSet('--dry-run')) {
  killit("Sender requested --dry-run (-n), which is NOT SUPPORTED by this script.  Aborting transfer", 0);
}

readConfig();

if (defined($RSYNC_REQUEST)) {
  logit("Found RSYNC_REQUEST; processing as pre-xfer", 7);

  eval {
    checkDestDir();

    ensureBaseStructure();

    ensureTransferDir();

    cullPartialDirs();

    logit("pre-xfer tasks complete", 7);
  };

  # exception, so handle it and then die
  if ($@) {
    logit("rsyncd_prepost ABNORMAL TERMINATION: $@", 0);
    exit 1;
  }
}
elsif (defined($RSYNC_EXIT_STATUS)) {
  logit("Found RSYNC_EXIT_STATUS; processing as post-xfer", 7);

  # Error codes:
  # 23     Partial transfer due to error
  # 24     Partial transfer due to vanished source files
  #
  # These can happen with bad ACLs or with files that disappeared
  # during the sync (which can happen since it's not a frozen snapshot).
  # To prevent the whole transfer from being marked as no good, we log
  # a warning but let the transfer complete successfully

  eval {

    if ($RSYNC_EXIT_STATUS == 23) {

      logit("rsync exited with error '$RSYNC_EXIT_STATUS' (raw '$RSYNC_RAW_STATUS')", 3);
      logit("Error likely caused by bad ACLs, so allowing transfer to complete sucessfully", 3);

      $RSYNC_EXIT_STATUS = 0;
    }
    elsif ($RSYNC_EXIT_STATUS == 24) {

      logit("rsync exited with error '$RSYNC_EXIT_STATUS' (raw '$RSYNC_RAW_STATUS')", 3);
      logit("Error likely caused by vanishing source files, so allowing transfer to complete sucessfully", 3);

      $RSYNC_EXIT_STATUS = 0;
    }

    if ($RSYNC_EXIT_STATUS) {
      logit("rsync exited with error '$RSYNC_EXIT_STATUS' (raw '$RSYNC_RAW_STATUS')", 3);
      # don't touch anything (leave old backups, snapshots, and logs in place)

      # if using BTRFS, snapshot this as a "partial" transfer
      if ($SNAPSHOT_TYPE eq 'btrfs') {
	my $transfer = "$BASE/$RSYNC/$TRANSFER";
	my $name = mtimeName($transfer);
	logit("Snapshotting partial transfer to $name", 6);
	system('btrfs', 'subvolume', 'snapshot',
	       $transfer, "$BASE/$PARTIAL/$name");
      }
      # if using ZFS, snapshot this as a "partial" transfer
      if ($SNAPSHOT_TYPE eq 'zfs') {
	my $name = 'partial_' . mtimeName("$BASE/$RSYNC/$TRANSFER");
	logit("Snapshotting partial transfer to $name", 6);
	system('zfs', 'snapshot', substr($BASE, 1) . '@' . $name);
	symlink("$BASE/$SNAPSHOT/$name", "$BASE/$PARTIAL/$name");
      }
    }
    else {
      # transfer completed successfully
      logit("Transfer completed sucessfully", 6);

      # move "complete" to snapshot
      if (snapshotComplete()) {
	transferComplete();

	cullSnapshotDirs();
      }
    }

    logit("post-xfer tasks complete", 7);
  };

  # exception, so handle it and then die
  if ($@) {
    logit("rsyncd_prepost ABNORMAL TERMINATION: $@", 0);
    exit 1;
  }
}


=head1 SEE ALSO

L<http:E<sol>E<sol>web.suffieldacademy.orgE<sol>ilsE<sol>netadminE<sol>docsE<sol>softwareE<sol>rsync_snapshotE<sol>>

=head1 AUTHOR

Jason Healy E<lt>jhealy@suffieldacademy.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2011 by Jason Healy

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut
