#!/usr/bin/env perl =head1 NAME users_over_quota =head1 SYNOPSIS users_over_quota [-w] [/homedirs/more] ... [/homedirs/last] =head1 ABSTRACT Takes a base directory containing user home directories on a system and produces a list of rsync exclude filters as output to prevent backing up files over quota. Optionally, it will e-mail users to warn them if they are over quota. =head1 DESCRIPTION The C<-w> flag specifies whether to warn users via e-mail if they are over quota. Otherwise, all other arguments are interpreted as base dirs containing user home directories. For example, C and C might be used as the paths that contain user home directories. As output, the script produces a list of rsync filters that exclude any files that put the user over quota. For example, if user "jbogus" has a "Movies" folder in his home directory that puts him over quota, the script would output - /path/to/homes/jbogus/Movies/ So that rsync will ignore this directory. The script works on I directories inside the home directory only, so if a single large file is embedded deep inside a folder, the entire folder's contents will be excluded. Directories are excluded starting with the largest, then the next largest, etc. until the total size of the non-excluded directories is under quota. Apple's "dscl" utility is used to look up home directories to find the user quota information. If a home directory is not found in the directory system, no excludes are generated (that is, the directory will be backed up in full). If you still wish to enforce a maximum size for directories that do not have an owner, set the "DEFAULT_QUOTA" variable. =cut # Report errors well use strict; use warnings; # deliver warnings via e-mail use Net::SMTP; =head1 CONFIGURATION VARIABLES =cut my $WARN = 0; my $SMTP = Net::SMTP->new('smtp.suffieldacademy.org'); my $EMAIL_FROM = 'netadmin@suffieldacademy.org'; my $EMAIL_FROM_NAME = 'Network Administrator'; my $EMAIL_TO_DEFAULT = 'netadmin@suffieldacademy.org'; # 1GiB my $DEFAULT_QUOTA = 1073741824; my $NFS_PREFIX = '/Network/Servers/tessa.suffieldacademy.org'; # Any path matching any of the regular expressions in this array will # NOT be counted towards a user's quota my @IGNORE = ( qr'^/raid/all/Users/[^/]+/Library/Caches', qr'^/raid/all/Users/[^/]+/.Trash' ); # Any directory containing a hidden file with this name will be ignored my $DOT_EXCLUDE = '.rsync_snapshot_exclude'; =head1 METHODS =head2 getUserInfo(C<$homes>, C<$dir>) [returns I<\%userinfo>] Given the base home directory path and the relative user directory, attempts to find the owner of the directory and then queries the directory service for information about the user (such as quota size and e-mail address). If information cannot be obtained, a hash with the special key "ERROR" will be returned, and a reason (if any) specified as the value to that key. If the C variable is specified, this method attempts to find the owner by searching the directory service for the user's home directory key. Otherwise, we assume that the name of the directory exactly matches the name of the user, and we query the directory service with this value. =over 2 =item C (I [B (I [B; close(DSCL) or $attr{ERROR} = "Could not close pipe to dscl: $!"; if (defined($result) && $result =~ /^(\S*)/) { $user = $1; } else { $attr{ERROR} = "Unable to find user with home directory $NFS_PREFIX$homes/$dir: $!"; } } else { $attr{ERROR} = "Unable to open dscl: $!"; } } else { # assume the username is the same as the dir name $user = $dir; } return \%attr if ($attr{ERROR}); if (open(DSCL, '-|', '/usr/bin/dscl', '-q', '/Search', '-read', "/Users/$user", 'FirstName', 'EMailAddress', 'HomeDirectoryQuota')) { my $k = undef; my $v = undef; while (my $line = ) { chomp $line; if ($line =~ /^(\S+): (.+)$/) { $attr{$1} = $2; $k = undef; } elsif ($line =~ /^(\S+):$/) { $k = $1; $v = undef; } elsif (defined($k) && $line =~ /^ (.*)$/) { $v = $attr{$k} . "\n" if (exists($attr{$k})); $v .= $1; $attr{$k} = $v; $v = undef; } elsif ($line =~ /^No such key: (.*)$/) { my $key = $1; if ($key eq 'FirstName') { # print STDERR "No $key key for $user: substituting default\n"; $attr{FirstName} = "Unknown"; } elsif ($key eq 'HomeDirectoryQuota') { # print STDERR "No $key key for $user: substituting default\n"; $attr{HomeDirectoryQuota} = $DEFAULT_QUOTA; } elsif ($key eq 'EMailAddress') { # print STDERR "No $key key for $user: substituting default\n"; $attr{EMailAddress} = $EMAIL_TO_DEFAULT; } } else { $attr{ERROR} = "Could not decode dscl line '$line'"; } } close(DSCL) or $attr{ERROR} = "Could not close pipe to dscl: $!"; } else { $attr{ERROR} = "Could not open pipe to dscl: $!"; } return \%attr; } # end sub getUserInfo =head2 du(I<$path>) [returns I<$totalBytes>] Given a filesystem path, this function returns the total number of bytes occupied by the filesystem path and any enclosed subdirectories. The path should be a fully-qualified filesystem path (beginning with a forward slash). The function traverses the directory starting at root path provided, and follows child directories. Symbolic links and special files are NOT counted or followed. Any paths that match patterns in the @IGNORE array are not counted, nor are they recursively searched. Any directories that contain a file named $DOT_EXCLUDE are also not counted, nor are they recursively searched. Additionally, an rsync exclude pattern for that directory is emitted by the program. The total number of bytes for the files and directories is returned from the function. Byte total is an exact count of all files, which may be lower than the actual amount of disk space used (due to block rounding). =cut sub du($$;$) { my $base = shift(@_); my $path = shift(@_); my $emit = shift(@_); $emit = 0 unless defined($emit); # shortcut so we don't have to write this a dozen times my $root = "$base/$path"; my $total = 0; my @stat = stat($root); if (-f _ || -l $root) { $total = -s _; } elsif (-d _) { $total = -s _; # Ignore if this directory shouldn't be counted for my $ignore (@IGNORE) { if ($root =~ $ignore) { # print "du Ignoring '$path'\n"; return 0; } } # Exclude if this directory is marked to not be backed up if (-f "$root/$DOT_EXCLUDE") { # print "du excluding '$path'\n"; print "- /$path\n" if $emit; return 0; } # recurse into subdirs and count totals # (note local indirect fh to prevent namespace collisions in recursion) opendir(my $subdir, $root) or warn "Can't opendir '$root': $!\n"; for my $entry (readdir($subdir)) { # skip special entries '.' and '..' if ('.' eq $entry || '..' eq $entry) { # just short-circuit if it's a special dir next; } # use non-prototype call to prevent warnings on recursive call $total += &du($base, "$path/$entry", $emit); } closedir($subdir); } return $total; } # end sub du =head2 duDepth(I<$path>, I<$levels>) [returns I<\@sizes>] Given a filesystem path, this function returns the total number of bytes for each file or directory that is I<$level> deep in the filesystem. Files that are deeper than this are counted in the totals of their parents. For example, given the following directory structure: root/ SomeFile.txt Movies/ MyBigMovie.mov Pictures/ Vacation/ One.jpg Two.jpg Assuming the path "/root" is passed to this function: A level of zero would give the total for the entire "root" A level of 1 would give totals for "SomeFile.txt", "Movies" (and all its child files), and "Pictures" (and all it's child files). A level of 2 would give totals for "SomeFile.txt" (even though it's only one level deep, it has no children and so is counted), "MyBigMovie.mov", and "Vacation" For an infinite depth, use -1. Note that this may consume a lot of memory, as a record will be kept for every file in the directory structure! The function returns a 2-dimensional array, sorted by file size (largest is at index zero). The dimensions are the file size, and then the path of the file. =cut sub duDepth($$$;$) { my $base = shift(@_); my $path = shift(@_); my $level = shift(@_); my $sizes = shift(@_) || []; my $root = "$base/$path"; # Ignore if this directory shouldn't be counted for my $ignore (@IGNORE) { if ($root =~ $ignore) { # print "Depth Ignoring '$path'\n"; return $sizes; } } # Exclude if this directory is marked to not be backed up if (-d $root && -f "$root/$DOT_EXCLUDE") { # print "Depth Excluding '$path'\n"; return $sizes; } if ($level == 0 || -l "$root" || ! -d "$root") { # leaf dir, or plain file, so add the accumulated size here my $size = du($base, $path, 0); # perform an insertion at the proper place in the list my $insert = $#{$sizes} + 1; while ($insert > 0) { if ($size > $sizes->[$insert-1][0]) { # my size is bigger than the one at this spot in the list, # so ripple it down $sizes->[$insert] = $sizes->[$insert-1]; } else { # this spot is where I belong; the next spot in the list # has a larger item than me last; } $insert--; } $sizes->[$insert] = [$size, $path]; } else { # set up for the recursive call $level-- unless $level < 0; opendir(my $subdir, "$root") or warn "Can't opendir '$root': $!\n"; for my $entry (readdir($subdir)) { # skip special entries '.' and '..' if ('.' eq $entry || '..' eq $entry) { # just short-circuit if it's a special dir next; } # use non-prototype call to prevent warnings on recursive call &duDepth($base, "$path/$entry", $level, $sizes); } closedir($subdir); } return $sizes; } # end sub duDepth =head2 format_bytes(C<$bytes>, [C<$fmt>]) [returns I] Format the given number of bytes into a more human-comprehensible number. =over 2 =item C<$bytes> (I [B]) Should be a non-negative integer value. Decimal values will be truncated, and other values interpreted as zero. =item C<$fmt> (I [B]) Alternate formatting string to use. Must have two placeholders; the first is for the number, and the second is for the suffix. =back =cut my @BYTE_SUFFIX = qw(B kB MB GB TB PB EB ZB YB); sub format_bytes($;$) { my $b = int(shift(@_) || 0); my $fmt = shift(@_) || '%3.2f %-2s'; my $s = ''; for my $i (0 .. $#BYTE_SUFFIX) { if ($b < 1024) { $s = $BYTE_SUFFIX[$i]; last; } else { $b = $b/1024; } } return sprintf($fmt, $b, $s); } # end sub format_bytes =head2 sendWarning(C<>) [returns I] Sends an e-mail message to the specified user if the C<-w> flag has been set. =over 2 =item C<$to> (I [B]) Address to send the e-mail to =item C<$name> (I [B]) Name of the person to send the e-mail to =item C<$quota> (I [B]) Number of bytes in this user's quota. =item C<$over> (I [B]) Number of bytes over the quota that the user has consumed. =item C<\@sizes> (I [B]) 2D array of paths and their sizes, sorted from largest to smallest, as produced by the C function. =item C<$overidx> (I [B]) The index of the last item in the sizes array that is over quota. This function will report on all items in sizes from 0 .. $overidx (inclusive). =back =cut sub sendWarning($$$$$$;) { my $to = shift(@_); my $name = shift(@_); my $quota = format_bytes(shift(@_)); my $over = format_bytes(shift(@_)); my $sizes = shift(@_); my $overidx = shift(@_); # don't send e-mail unless the -w flag was specified return unless ($WARN > 0); my $message = <[$i][0]), substr($sizes->[$i][1], 0, 65), "\n"); } $message .= <mail($EMAIL_FROM); # $SMTP->to($to); $SMTP->to('jhealy@suffieldacademy.org'); $SMTP->data(); # $SMTP->datasend("To: $name <$to>\n"); $SMTP->datasend("To: Jason Healy \n"); $SMTP->datasend("From: $EMAIL_FROM_NAME <$EMAIL_FROM>\n"); $SMTP->datasend("Subject: Fileserver Usage Warning\n"); $SMTP->datasend("\n"); $SMTP->datasend($message); $SMTP->dataend(); } # end sub sendWarning ############################################################################ ### MAIN ### ############################################################################ if ($#ARGV > -1 && $ARGV[0] eq '-w') { shift(@ARGV); $WARN = 1; } if ($#ARGV < 0) { print STDERR "Usage: users_over_quota [-w] \n\n"; exit 1; } for my $HOMES (@ARGV) { # strip trailing slash(es), if any $HOMES =~ s#/*$##; opendir(HOMES, $HOMES) or die "Could not open homes base dir '$HOMES': $!"; # ignore "hidden" dirs and plain files my @dirs = grep { /^[^.]/ && -d "$HOMES/$_" } readdir(HOMES); # my @dirs = grep { /^jhealy/ } readdir(HOMES); closedir(HOMES) or die "Could not close home dir handle: $!"; for my $dir (@dirs) { my %attr = %{getUserInfo($HOMES, $dir)}; if ($attr{ERROR}) { die "Could not get user info for dir '$dir': $attr{ERROR}\n"; } else { my $over = du($HOMES, $dir, 1) - $attr{HomeDirectoryQuota}; if ($over > 0) { # user is over quota; find largest items my $sizes = duDepth($HOMES, $dir, 2); my $overleft = $over; my $overidx = -1; while ($overleft > 0) { $overidx++; $overleft -= $sizes->[$overidx][0]; } # at this point, everything in $sizes from 0 to $overidx (inclusive) # should be excluded # print "# $dir is $over (" . format_bytes($over) . ") over quota\n"; for my $i (0 .. $overidx) { print "- /" . $sizes->[$i][1] . "\n"; } sendWarning($attr{EMailAddress}, $attr{FirstName}, $attr{HomeDirectoryQuota}, $over, $sizes, $overidx); } else { # print "# $dir is $over (-" . format_bytes(-$over) . ") under quota\n"; } } } } =head1 SEE ALSO LEweb.suffieldacademy.orgEilsEnetadminEsoftwareErsync_snapshotE> =head1 AUTHOR Jason Healy, Suffield Academy =head1 COPYRIGHT AND LICENSE Copyright 2009 by Jason Healy This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut