#!/usr/bin/perl -T # $Id: rsyncd_prepost 1607 2011-12-16 19:18:36Z jhealy $ =head1 NAME rsyncd_prepost [module name] =head1 SYNOPSIS Add this script as the exec for the B and B options in C. Or, run the two-argument version to initialize the directory storage for a given rsync module. =head1 ABSTRACT A pre/post script for rsync that performs automatic rotation of backups to keep a history of incremental tranfers. =head1 DESCRIPTION The complete documentation for this package is located on our documentation site. Please see the following page for a complete description and instructions: LEweb.suffieldacademy.orgEilsEnetadminEdocsEsoftwareErsync_snapshotE> =cut use warnings; use strict; # strftime to format date strings use POSIX qw(strftime floor); # direct syslogging (default, unless a logfile is specified) use Unix::Syslog qw(:macros :subs); # default log file to write to before per-module logging is enabled. # note that once a module config has been parsed, logging will switch # to a per-module log file, so this log is only for high-level config # errors my $LOG = undef; # default syslog params openlog 'rsyncd_prepost', 0, LOG_FTP; # Logging severity; higher levels mean more output. -1 means no output. my $LOG_SEVERITY = 7; ### ### Configuration variables (can be overridden in per-module config file) ### =head1 CONFIGURATION VARIABLES This script tries to derive as much as it can from the supplied C file, using the native key-value pairs specified in the file. However, some behaviors require additional configuration, which can be specified by inserting special comments directly into the C file. Any comment line beginning with the special string: #rsyncd_prepost: KEY=VALUE will be read by this script, and the value assigned to the given key. Legal key/value pairs for this configuration option are specified below: =cut =head2 $USER and $GROUP The UID and GID that the backup directories should belong to. The script inherits the native C values I and I. Numeric or textual ids are supported. Note that this does I change the effective UID or GID of the script. The script always runs as the user who started the rsyncd daemon. These variables simply affect the default permissions on the directories created by the script. =cut my $USER = 0; my $GROUP = 0; =head2 $SNAPSHOT_TYPE #rsyncd_prepost: snapshottype= The script can produce snapshots through different methods, depending on the capabilities of the underlying [file]system. By default, the script uses hardlink, as this should work across multiple operating and file systems. If C is used, the script will assume that all transfers will be hardlinked to the previous "complete" backup. Clients must provide the C<--link-dest /complete> argument, or they will receive a configuration error. If C or C are used, the script will automatically create a copy-on-write (COW) snapshot of the backup directory immediately after the transfer completes. No linking is necessary, so clients specifiying C<--link-dest> will receive an error message. =cut my $SNAPSHOT_TYPE = 'hardlink'; =head2 $DATE_FORMAT #rsyncd_prepost: dateformat= By specifying a strftime(3) date format string as the value, the 'dateformat' key specifies how directories will be named once they are snapshotted. The default includes the unix timestamp, year, month, day, hour, minute, and second. =cut my $DATE_FORMAT = '%s_%b_%d_%Y_%H-%M-%S'; =head2 $DATE_ROUND #rsyncd_prepost: dateround= Amount (in seconds) to round modification times when culling directories. You should set this value to the expected interval between backup runs. For example, if you round by 86400, all time comparisons for previous backups will be rounded to midnight on the day they occur. This allows comparisons between backups to allow for different ending times based on how long the transfers took. =cut my $DATE_ROUND = 86400; =head2 $PARTIAL_MAXAGE #rsyncd_prepost: partialmaxage= If a transfer is interrupted, the partial backup is preserved in case you need the contents. This variable defines the maximum age (in seconds) that a partial transfer will be kept before it's deleted to save space. =cut my $PARTIAL_MAXAGE = 14 * 86400; =head2 %SNAPSHOT_PRESERVE #rsyncd_prepost: snapshotpreserve= This is a I configuration option. Because it changes values stored in a hash, multiple 'snapshotpreserve' lines can appear in the config file, each affecting a different I key. This hash contains "time since current backup" (I) as its keys, and "time since last snaphot" (I) as its values. This allows the script to preserve snapshots based on how old they are and how long its been since the last preservation. Times are given in seconds. If -1 is used for the I parameter, no snapshots from that time are kept (this gives you a way to delete old backups completely once they reach a certain age). Larger-valued keys override smaller-valued ones. In this way, you can specify a preservation scheme that keeps several recent backups, but slowly prunes them off as they get older. The default configuration has the following effect: =over 2 =item Keep all snapshots if they're less than 2 weeks old =item Only keep weekly snapshots for snapshots 2 weeks to 2 months old =item Only keep monthly snapshots for snapshots 2 months to 12 months old =item Only keep yearly snapshots for snapshots 1 year or older =back B if you specifiy I value for snapshotpreserve, you must specify I values you wish to use. In other words, as soon as you specify one key/value pair, all the defaults are erased. So if you want to change just one of the three default pairs, you'll need to re-specify all of them (see below). Specifying this default configuration in the C file would look like this: #rsyncd_prepost: snapshotpreserve=1209600 604800 #rsyncd_prepost: snapshotpreserve=5184000 2592000 #rsyncd_prepost: snapshotpreserve=31556926 31556926 1209600 is 2 weeks, 604800 is 1 week, 5184000 is 60 days, 2592000 is 30 days, and 31556926 is one year. =cut my %SNAPSHOT_PRESERVE = ( # Preserve all snapshots by default (14*86400) => (7*86400), # If snapshot is over 2 weeks old, only # keep it if it's spaced a week from the # last preserved snapshot (60*86400) => (30*86400), # If snapshot is over 2 months old, only # keep it if it's spaced a month from the # last preserved snapshot (31556926) => (31556926) # If snapshot is over a year old, only # keep it if it's spaced a year from the # last preserved snapshot ); ### ### Base structure directory names (don't need to edit unless you ### hate the names we've chosen for the directories) ### # directory to hold incomplete transfers my $PARTIAL = 'partial'; # directory to hold archived complete transfers # ZFS forces you to use .zfs/snapshot, so if zfs is the $SNAPSHOT_TYPE # that will be the value automatically my $SNAPSHOT = 'snapshot'; # directory to hold expired transfers marked for deletion my $TRASH = 'trash'; # directory to hold in-progress and most-recent transfers my $RSYNC = 'rsync'; # directory holding the last complete transfer my $COMPLETE = 'complete'; # directory that is the target of the current in-progress transfer my $TRANSFER = 'transfer'; ######################################################################### # End of user customization (should not need to edit below here) # ######################################################################### ### ### Parse Environment Variables ### # Clean up the path $ENV{'PATH'} = join(':', ( '/usr/local/sbin', '/usr/local/bin', '/opt/local/sbin', '/opt/local/bin', '/usr/sbin', '/usr/bin', '/sbin', '/bin' )); # Read in the environment variables from rsync (see rsyncd.conf(5)) # Because we're running in taint mode, untaint variables along the way. my $RSYNC_MODULE_NAME = undef; if (defined($ENV{'RSYNC_MODULE_NAME'}) && $ENV{'RSYNC_MODULE_NAME'} =~ /^([^\/\]]+)$/) { $RSYNC_MODULE_NAME = $1; } my $RSYNC_MODULE_PATH = undef; my $BASE = undef; if (defined($ENV{'RSYNC_MODULE_PATH'}) && $ENV{'RSYNC_MODULE_PATH'} =~ /^(\/.*)$/) { $RSYNC_MODULE_PATH = $1; if ($RSYNC_MODULE_PATH =~ /^(\/.*)\/rsync\/?$/) { $BASE = $1; } } my $RSYNC_HOST_ADDR = undef; if (defined($ENV{'RSYNC_HOST_ADDR'}) && $ENV{'RSYNC_HOST_ADDR'} =~ /^([\d.]+)$/) { $RSYNC_HOST_ADDR = $1; } my $RSYNC_HOST_NAME = undef; if (defined($ENV{'RSYNC_HOST_NAME'}) && $ENV{'RSYNC_HOST_NAME'} =~ /^([a-zA-Z0-9.-]+)$/) { $RSYNC_HOST_NAME = $1; } my $RSYNC_USER_NAME = '*ANONYMOUS*'; if (defined($ENV{'RSYNC_USER_NAME'}) && $ENV{'RSYNC_USER_NAME'} =~ /^([a-zA-Z0-9_-]+)$/) { $RSYNC_USER_NAME = $1; } my $RSYNC_PID = undef; if (defined($ENV{'RSYNC_PID'}) && $ENV{'RSYNC_PID'} =~ /^(\d+)$/) { $RSYNC_PID = $1; } my $RSYNC_REQUEST = undef; # pre-xfer only if (defined($ENV{'RSYNC_REQUEST'}) && $ENV{'RSYNC_REQUEST'} =~ /^($RSYNC_MODULE_NAME.*)$/) { $RSYNC_REQUEST = $1; } my @RSYNC_ARGS = (); # pre-xfer only if (defined($ENV{'RSYNC_ARG0'}) && $ENV{'RSYNC_ARG0'} eq 'rsyncd') { my $i = 0; while (defined($ENV{"RSYNC_ARG$i"})) { push(@RSYNC_ARGS, $ENV{"RSYNC_ARG$i"}); $i++; } } my $RSYNC_EXIT_STATUS = undef; # post-xfer only if (defined($ENV{'RSYNC_EXIT_STATUS'}) && $ENV{'RSYNC_EXIT_STATUS'} =~ /^([\d-]+)$/) { $RSYNC_EXIT_STATUS = $1; } my $RSYNC_RAW_STATUS = undef; # post-xfer only if (defined($ENV{'RSYNC_RAW_STATUS'}) && $ENV{'RSYNC_RAW_STATUS'} =~ /^([\d-]+)$/) { $RSYNC_RAW_STATUS = $1; } ### ### Subroutines ### =head1 METHODS =cut =head2 logit($message, $severity) [returns I] If the global severity level is set at least as high as $severity, log the given $message to the currently open log file. =over 2 =item C (I [B]) The message to log =item C (I [B]) The severity for this message =back =cut sub logit($;$) { my $message = $_[0]; my $severity = exists($_[1]) ? $_[1] : 2; chomp $message; $message .= "\n"; unless ($severity > $LOG_SEVERITY) { my $fmt = "(%15s) %s@%s: %s\n"; my @arg = ($RSYNC_HOST_ADDR, $RSYNC_USER_NAME, $RSYNC_MODULE_NAME, $message); # default to system err when run from the console if (!defined($RSYNC_PID)) { print STDERR "$message\n"; } elsif (defined($LOG)) { printf $LOG "%19s $fmt", strftime('%Y/%m/%d %H:%M:%S', localtime), @arg; } else { # default to syslog syslog $severity, $fmt, @arg; } } } # end sub logit =head2 killit($message, $severity) [returns I] Logs the given message, just as in logit(), but also exits with an error status of 1 (terminating any transfer). See logit() for parameters and other information. =cut sub killit($;$) { my $message = $_[0]; my $severity = exists($_[1]) ? $_[1] : 2; logit($message, $severity); exit 1; } # end sub killit =head2 readConfig() [returns I] Parses the rsyncd.conf file specified as the first argument to the script (if provided). Overrides any defaults in the program with the values specified in the config file. The config file can have native rsync arguments, but also special comments of the form: # rsyncd_prepost: KEY=VALUE Which will be read by this routine. This allows you to specify options for both rsyncd and this script in the same config file (which is convienent because they share some of the same options). =cut sub readConfig() { my $conf = $ARGV[0]; if (defined($conf)) { if (! -r $conf) { logit("Configuration file '$conf' not readable. Terminating transfer", 3); exit 1; } } else { logit("No configuration file specified; using defaults", 5); return; } # The config file parsing can be in one of several phases: # Global (options before a named section) # Other Section (options in a named section that is not the current module) # My Section (options in a named section matching the current module) my $mysec = 1; # track snapshot times in a temporary hash my %snapshot = (); # keep track of long lines that are continued with a backslash my $multi = ''; if (open(CONFIG, $conf)) { while (my $line = ) { chomp $line; # strip any leading whitespace or comment markers if ($line =~ /^\s*#[\s#]*(.*)$/) { if ($multi eq '') { $line = "#$1"; } elsif (substr($multi, 0, 1) eq '#') { $line = $1; } else { $line = ''; } } elsif ($line =~ /^\s*(.*)$/) { $line = $1; } # end whitespace strip # include any data from a previous continuation lines $line = $multi . $line; $multi = ''; # check for multi-line configs, and accumulate as necessary if ($line =~ /^(.*)\\\s*$/) { $multi = $1; next; } # parse line if ($line =~ /^\[\s*(.*\S)\s*\]/) { # section my $section = $1; $section =~ s/\s//g; if ($section eq $RSYNC_MODULE_NAME) { $mysec = 1; } else { $mysec = 0; } } elsif ($mysec == 0) { # ignore if we're not in our named section or global next; } elsif ($line eq '' || $line =~ /^\s*$/) { # only whitespace (ignore) } elsif ($line =~ /^\s*#\s*rsyncd_prepost:\s*(.*\S)\s*=\s*(.*\S)\s*$/) { # config comment my $key = $1; my $val = $2; $key =~ s/\s//g; if ($key eq 'snapshottype') { logit("Changing snapshottype to: '$val'", 7); $SNAPSHOT_TYPE=$val; } elsif ($key eq 'dateformat') { logit("Changing date format to: '$val'", 7); $DATE_FORMAT=$val; } elsif ($key eq 'dateround') { if ($val =~ /^(\d+)$/) { logit("Changing date rounding to: '$val'", 7); $DATE_ROUND=$1; } else { logit("Unparseable 'dateround' value: '$val'", 4); } } elsif ($key eq 'partialmaxage') { if ($val =~ /^(\d+)$/) { logit("Changing partial maxage to: '$val'", 7); $PARTIAL_MAXAGE=$1; } else { logit("Unparseable 'partialmaxage' value: '$val'", 4); } } elsif ($key eq 'snapshotpreserve') { if ($val =~ /(\d+)\s+(-?\d+)/) { logit("Setting snapshot preserve '$1' to '$2'", 7); $snapshot{$1} = $2; } else { logit("Unparseable 'snapshotpreserve' value: '$val'", 4); } } else { logit("Unknown rsyncd_prepost key: '$key'", 4); } } elsif ($line =~ /^#/) { # generic comment (ignore) next; } elsif ($line =~ /^([^=]*\S)\s*=\s*(.*\S)\s*$/) { # rsync config var my $key = $1; my $val = $2; $key =~ s/\s//g; if ($key eq 'syslogfacility') { logit("Changing syslog facility to: '$val'", 7); if ($val eq 'auth') { openlog 'rsyncd_prepost', 0, LOG_AUTH; } elsif ($val eq 'authpriv') { openlog 'rsyncd_prepost', 0, LOG_AUTHPRIV; } elsif ($val eq 'cron') { openlog 'rsyncd_prepost', 0, LOG_CRON; } elsif ($val eq 'daemon') { openlog 'rsyncd_prepost', 0, LOG_DAEMON; } elsif ($val eq 'ftp') { openlog 'rsyncd_prepost', 0, LOG_FTP; } elsif ($val eq 'kern') { openlog 'rsyncd_prepost', 0, LOG_KERN; } elsif ($val eq 'lpr') { openlog 'rsyncd_prepost', 0, LOG_LPR; } elsif ($val eq 'mail') { openlog 'rsyncd_prepost', 0, LOG_MAIL; } elsif ($val eq 'news') { openlog 'rsyncd_prepost', 0, LOG_NEWS; } elsif ($val eq 'syslog') { openlog 'rsyncd_prepost', 0, LOG_SYSLOG; } elsif ($val eq 'user') { openlog 'rsyncd_prepost', 0, LOG_USER; } elsif ($val eq 'uucp') { openlog 'rsyncd_prepost', 0, LOG_UUCP; } elsif ($val eq 'local0') { openlog 'rsyncd_prepost', 0, LOG_LOCAL0; } elsif ($val eq 'local1') { openlog 'rsyncd_prepost', 0, LOG_LOCAL1; } elsif ($val eq 'local2') { openlog 'rsyncd_prepost', 0, LOG_LOCAL2; } elsif ($val eq 'local3') { openlog 'rsyncd_prepost', 0, LOG_LOCAL3; } elsif ($val eq 'local4') { openlog 'rsyncd_prepost', 0, LOG_LOCAL4; } elsif ($val eq 'local5') { openlog 'rsyncd_prepost', 0, LOG_LOCAL5; } elsif ($val eq 'local6') { openlog 'rsyncd_prepost', 0, LOG_LOCAL6; } elsif ($val eq 'local7') { openlog 'rsyncd_prepost', 0, LOG_LOCAL7; } else { logit("Unknown logging facility '$val'; using 'ftp'", 4); openlog 'rsyncd_prepost', 0, LOG_FTP; } } # end syslog facility elsif ($key eq 'logfile') { logit("Changing logging from syslog to file '$val'", 6); open ($LOG, ">>$val") or logit("Couldn't open log '$val': $!", 3); } elsif ($key eq 'uid') { if ($val =~ /[^\d-]/) { $USER = getpwnam($val); } else { $USER = $val; } logit("Changing user to: '$val' ($USER)", 7); } elsif ($key eq 'gid') { if ($val =~ /[^\d-]/) { $GROUP = getgrnam($val); } else { $GROUP = $val; } logit("Changing group to: '$val' ($GROUP)", 7); } elsif ($key eq 'path') { if (!defined($RSYNC_PID)) { # assign the path from the config file if ($val =~ /^(\/.*)$/) { $RSYNC_MODULE_PATH = $1; if ($RSYNC_MODULE_PATH =~ /^(\/.*)\/rsync\/?$/) { $BASE = $1; } } } elsif ($RSYNC_MODULE_PATH ne $val) { logit("Sanity error: configured module path\n" . "$RSYNC_MODULE_PATH\ndiffers from reported path\n" . " $val\n" . "ABORTING TRANSFER"); } } } # end rsyncd config option else { logit("Unrecognized config file line: '$line'", 3); } # end parse } # end while # only swap SNAPSHOT_PRESERVE if something was defined if (%snapshot) { %SNAPSHOT_PRESERVE = %snapshot; } # ZFS overrides the snapshot dir (mandatory location) if ($SNAPSHOT_TYPE eq 'zfs') { $SNAPSHOT = '.zfs/snapshot'; } } else { logit("Unable to read rsyncd.conf file '$conf': $!. Terminating", 3); exit 1; } } # end sub readConfig =head2 argSet($arg) [returns I] Returns 1 if the given option was passed to rsync as an argument. This function accepts two kinds of arguments to search for: I and I. A I arg starts with a single leading dash ("-"), and will match a flag argument specified anywhere in a longer string of flags. A I argument will only match a single argument if it matches completely. For example, the I argument "--link-dest" will only match if there is a single argument with that exact name. Meanwhile, the I argument "-n" will match both a single argument that is exactly "-n", but also the argument "-vnlHogDt", because it is a flag concatenated together with others. =over 2 =item C (I [B]) The argument to search for =back =cut sub argSet($;) { my $arg = $_[0]; my $flag = 0; my $found = 0; logit("Searching for argument '$arg'", 7); # check to see if this is a single-letter flag argument if ($arg =~ /^-(.)$/) { $flag = 1; $arg = $1; logit("Identified single-letter flag '$arg'", 7); } for my $a (@RSYNC_ARGS) { # if we're searching for a flag, and the arg starts with ONE dash... if ($flag && $a =~ /^-([^-]+)$/) { # see if the letter we're searching for appears in the string of flags if ($1 =~ /$arg/) { $found = 1; } } # otherwise, just do an exact whole-string match elsif ($a eq $arg) { $found = 1; } last if $found; } return $found; } # end sub argSet =head2 mtime(C<$file>) [returns I] Given a file (or directory) name, this method gets the last-modified time of the file and returns it. =over 2 =item C (I [B]) The path to the file or directory to use as the source of the modify time =back =cut sub mtime($;) { my $file = $_[0]; my $mtime = undef; if (stat($file)) { my @s = stat(_); $mtime = $s[9]; } return $mtime; } # end sub mtime =head2 mtimeName(C<$file>) [returns I] Given a file (or directory) name, this method gets the last-modified time of the file and returns a date string formatted by $DATE_FORMAT. =over 2 =item C (I [B]) The path to the file or directory to use as the source of the date string =back =cut sub mtimeName($;) { my $file = $_[0]; my $mtime = mtime($file); if (defined($mtime)) { return strftime($DATE_FORMAT, localtime($mtime)); } return undef; } # end sub mtimeName =head2 timeRound(C<$tile>) [returns I] Given a time value in seconds-since-epoch (such as an mtime or time() value), returns the time value rounded to the next-lowest $DATE_ROUND value. For example, if $DATE_ROUND is 86400, any time value provided will be rounded to midnight on that day. =over 2 =item C