#!/usr/bin/perl # $Id: duchk,v 1.12 2004/01/14 22:01:35 alban Exp $ # Copyright (C) 2003-2004 David Alban # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use warnings; use strict; use Getopt::Long; use English; $ENV{ PATH } = "/bin:/usr/bin"; ### PORTABILITY: defined these option letters correctly for du(1) on the ### local system my $summary = "s"; my $size_in_kbytes = "k"; my $dont_cross_filesystems = "x"; my $duopts = "${summary}${size_in_kbytes}${dont_cross_filesystems}"; # program name ( my $pgm = $0 ) =~ s=^.*/==s; my $tabsize = 8; my $indentation = -1 * $tabsize; my ( $opt_help, $opt_recursive, $opt_size ); Getopt::Long::Configure( "bundling" ); if ( not GetOptions ( "h|help" => \$opt_help, "r|recursive" => \$opt_recursive, "s|size=i" => \$opt_size, ) ) { $opt_help and usage() and exit 0; warn "Execute \"$pgm --help\" for usage\n"; exit 1; } # if $opt_help and usage() and exit 0; # get value for $minsize, the minimum size a file must be to be included # in output. It will be (in this order): # 1. $opt_size, if a valid argument to --size was given # 2. The $DUCHK_SIZE environment variable, if it is defined and # contains only digits # 3. The number 1 # It is a fatal error for the --size argument to contain non-digits. It # is an ignorable error for the $DUCHK_SIZE env variable to contain # non-digits. my $minsize = 1; if ( defined $opt_size ) { if ( $opt_size =~ /\D/ ) { die "$pgm: $opt_size: size must consist entirely of digits\n", "Type $pgm --help for details\n"; } # else { $minsize = $opt_size; } # if } # if else { my $sizevar = $ENV{ DUCHK_SIZE }; if ( defined $sizevar ) { if ( $sizevar !~ /\D/ ) { $minsize = $sizevar; } # if else { warn "$pgm: $sizevar: \$DUCHK_SIZE environment variable contains", " non-digits; ignoring\n"; } # if } # if } # if # the directory for (initial) processing not defined( my $startdir = shift ) and usage() and exit 0; warn "$pgm: smallest size displayed will be $minsize kbyte(s)\n"; # get info on active mount points my $mtab = mount_table(); # do the main processing process_directory( $startdir ); sub usage { print STDERR <<"EndOfUsage"; duchk - list disk usage in directory or directory tree usage: $pgm [ options ] Directory options: -h,--help List usage -r,--recursive Process entire directory tree with Directory as the root node. Default is to process Directory only, and not subdirectories therein. -s,--size Size Print information only on files with size >= Size kbytes NOTE: The default behavior is to print information on all files with non-zero size. Use the --size option if you want to limit output based on file size. Filesystems boundaries are not crossed. Directories are displayed with trailing slashes. EndOfUsage } # usage sub process_directory { my $d = shift; not -d $d and fdie( "$startdir: not a directory or doesn't exist\n" ); # the stdout of each invocation of this subroutine will be indented # a(n) (additional) level indent(); # will store sizes of files in the currently processed directory my $sizes = {}; # i originally had the program die here, but decided that in recursive # mode, i'd like it to continue, but just not to have processed this # particular directory if ( not opendir DIR, $d ) { fwarn( "$d: can't open directory: $!\n" ); return; } # if while ( defined( my $entry = readdir DIR )) { # don't process "." and ".." next if $entry =~ /^\.(\.)?$/; # we need the full path of the entry for some operations ( my $fullpath = "$d/$entry" ) =~ s=//*=/=g; # don't process if it's an active mountpoint if ( is_mountpoint( "$fullpath" )) { warn "$pgm: $fullpath: active mountpoint\n"; next; } # if # store size of entry $$sizes{ $entry } = du( $fullpath ); } # while close DIR; # for each $file in the currently processed directory, and in the # order of descending size... for my $file ( sort { $$sizes{ $b } <=> $$sizes{ $a } } keys %$sizes ) { # don't process if we were unable to get size information for the file next if not defined $$sizes{ $file }; # ignore if file is smaller than minimum size to be displayed next if $$sizes{ $file } < $minsize; # get the full path of the file and collapse all multiple adjacent # occurrences of slashes to single slashes ( my $fullpath = "$d/$file" ) =~ s=//*=/=g; # if file is a directory, we'll display it with a trailing slash my $slash = -d $fullpath ? "/" : ""; # for output purposes, make the following string replacements: # each backslash becomes two backslashes # each space becomes "\ " # each tab becomes "\t" # each newline becomes "\n" my $SOH = sprintf "%c", 1; ( my $printable_name = $fullpath ) =~ s/(\\)/$SOH/g; $printable_name =~ s/( )/\\ /g; $printable_name =~ s/(\t)/\\t/g; $printable_name =~ s/(\n)/\\n/g; $printable_name =~ s/($SOH)/\\\\/g; # for output purposes, print the octal code of any non-printable # character if ( $printable_name =~ /[^\x09\x20-\x7e\n]/ ) { $printable_name = demystified( $printable_name ) } # if # finally, print size and name of file, using current level of # indentation printf "%s%10d %s\n", ' ' x $indentation, $$sizes{ $file }, "$printable_name$slash"; # if file is a directory, and we got the recursive option, # recursively call this subroutine with the full path of the file -d $fullpath and $opt_recursive and process_directory( $fullpath ); } # for outdent(); } # process_directory # this routine will get the size in bytes of a file sub du { my $f = shift; my $pid; my @du_results; # The following code was adapted from the perlsec man page. I needed # to du(1) from within perl and collect the output, but without a # shell to interpret metacharacters. If a shell interpreted # metacharacters, then this program wouldn't be able to handle # filenames containing metacharacters. I banged my head on this for # quite a while before the perlfunc man page led me to the example on # the perlsec man page. Whew!!! #################################################### ##### begin code adapted from perlsec man page ##### #################################################### die "Can't fork: $!" unless defined( $pid = open( KID, "-|" )); if ( $pid ) { # parent while ( ) { chomp; push @du_results, $_; } close KID; } # if else { my @temp = ( $EUID, $EGID ); my $orig_uid = $UID; my $orig_gid = $GID; $EUID = $UID; $EGID = $GID; # Drop privileges $UID = $orig_uid; $GID = $orig_gid; # Make sure privs are really gone ($EUID, $EGID) = @temp; die "Can't drop privileges" unless $UID == $EUID && $GID eq $EGID; # Minimal PATH. $ENV{ PATH } = "/bin:/usr/bin"; # Consider sanitizing the environment even more. exec "du", "-$duopts", $f or fdie( "can't exec du: $!\n" ); } # if #################################################### ##### end code adapted from perlsec man page ##### #################################################### # check to make sure array contains at least one line. if not, warn # and return an undefined value if ( @du_results < 1 ) { fwarn( ": no stdout from command\n" ); return undef; } # if chomp( my $results = $du_results[ 0 ] ); # parse the single line of du output for the size or return undef if ( $results !~ /^\s*(\d+)\s+\S+.*$/s ) { fwarn( "can't parse du output: $results\n" ); return undef; } # if # return the size $1; } # du # get info on active mount points and put it in a hash sub mount_table { my $mtab = {}; open MNT, "mount|" or fdie( "mount|: can't open cmd: $!\n" ); while ( defined( my $line = )) { chomp $line; # parse a line of "mount" output, capture the mount point if ( $line !~ /^\S+\s+on\s+(\S+)/ ) { fdie( "couldn't parse line of mount output: \"$line\"\n" ); } # if my $directory = $1; # store the mountpoint in a hash whose keys are strings of the form # created by the file_id() subroutine (which see) my $file_id; if ( not defined( $file_id = file_id( $directory ))) { fdie( "$directory: could not devine fsdev and inode for", " candidate mount point\n" ); } # if $$mtab{ $file_id } = $directory; } # while close MNT; # return the hash reference $mtab; } # mount_table # for the input path, return a string of the form "fsdev,inode" where # fsdev is the file system device number and inode is the inode number # returned by an lstat() call sub file_id { my $f = shift; my @s; if ( not ( @s = lstat $f )) { fwarn( "$f: cannot stat\n" ); return undef; } # if "$s[ 0 ],$s[ 1 ]"; } # file_id # return boolean indicating whether the input path is an active mount point sub is_mountpoint { my $f = shift; # get "file id" for input path or return false not defined( my $fid = file_id( $f )) and return 0; # return whether this "file id" exists in the active mount point table exists $$mtab{ $fid } ? $$mtab{ $fid } : ""; } # is_mountpoint # die, but prepend to the message the program name and the function # from which we were called sub fdie { my $f = ( caller 1 )[ 3 ]; die "$pgm: $f() ", @_; } # fdie # warn, but prepend to the message the program name and the function # from which we were called sub fwarn { my $f = ( caller 1 )[ 3 ]; warn "$pgm: $f() @_"; } # fwarn # increase indentation by one "level" sub indent { $indentation += $tabsize; } # indent # decrease indentation by one "level" sub outdent { $indentation -= $tabsize; $indentation < -1 * $tabsize and fdie( "resulting indentation is less than zero: $indentation\n" ); } # outdent sub demystified { defined( my $string = shift ) or fdie( "null \$string\n" ); my $new_string = ""; for my $pos ( 0..length( $string )) { my $char = substr $string, $pos, 1; if ( $char =~ /[^\x09\x20-\x7e\n]/ ) { my $ord_string = sprintf "%3o", ord $char; $ord_string =~ s/ /0/g; $new_string .= "\\" . $ord_string; } # if else { $new_string .= $char; } # if } # for return $new_string; } # demystified