#!/usr/bin/perl # $Id: dmlocate,v 1.10 2000/04/12 23:05:49 alban Exp $ # dmlocate # Searches (ascii) databases representing contents of local dmf # filesystems. Prints to stdout lines in databases matching given # regular expressions. The databases are created by the dmupdatedb # and gendmlocdb scripts. # Each line in the database consists of: # # DMF_STATEbfidinodepath # # where DMF_STATE is always all caps and bfid is always all lowercase # The basic idea behind dmlocatedb and dmlocate is similar to that # behind GNU's updatedb and locate except that dmlocate expects # regular expressions, not file globbing patterns. # See usage() function below for an overview of functionality. use strict; my ( %negative_field_search, @databases, $anything_but_something, $anything_or_nothing, $arg, $beginning_anything, $beginning_bfid_anything, $beginning_inode_anything, $beginning_path_anything, $beginning_state_anything, $bfid, $bfid_field, $bfid_op, $bfid_pattern, $case_op, $cmd, $db, $dbpath, $debug, $dmlocatedb_dir, $do_the_search, $dot_star, $ending_anything, $ending_bfid_anything, $ending_inode_anything, $ending_path_anything, $ending_state_anything, $file, $got_field_args, $got_freestanding_pattern_arg, $i, $ignore_case, $inode, $inode_field, $inode_op, $inode_pattern, $j, $k, $negative_pattern_search, $path, $path_field, $path_op, $path_pattern, $pattern, $pgm, $pid, $state, $state_field, $state_op, $state_pattern, ) = (); # get program name (as invoked) ( $pgm = $0 ) =~ s=^.*/==s; # if the user has DMLOCATEDBDIR defined, look for dmlocate databases # there... if ( ! ( $dmlocatedb_dir = "$ENV{ DMLOCATEDBDIR }" )) { # otherwise use... $dmlocatedb_dir = "/dmf/etc/dmlocatedb"; } # if if ( ! -e $dmlocatedb_dir ) { die "$pgm: $dmlocatedb_dir: doesn't exist\n"; } # if if ( ! -r $dmlocatedb_dir ) { die "$pgm: $dmlocatedb_dir: cannot read\n"; } # if if ( ! @ARGV ) { usage(); exit 1; } # if if ( grep /^-h$|^-\?$|^--help$/, @ARGV ) { usage(); exit 0; } # if # Whether or not we actually perform the search depends on the value of # this boolean variable. If we get the -n,--dont-execute, this will be # set to 0. Here, we intialize it to true. (-n is most useful when # used with -d so that debug output is shown without actually searching # the databases.) $do_the_search = 1; # While processing the command line arguments, if we get # syntactically correct instances of any of the field options (or # compliments thereof) we set values in the %negative_field_search # hash. This is intended to contain a boolean value for each key, # the keys being the fields (state, bfid, inode, path). For example, # if we get +b (or --bfid-not) we'll set $negative_search{ bfid } to 1 # (true). If we subsequently get -b (or --bfid) we'll reset the # field to 0 (false). Thus, when the command line args have all been # processed, we'll know, for each field, whether to do a "positive" or # "negative" search, i.e., "print if //" or "print if ! //". # Also, whenever we get a field option, set $got_field_args to true. # If we don't get field options, then set $got_freestanding_pattern to # true. while ( defined ( $arg = shift )) { if ( $arg =~ /^-b$|^--bfid$/ ) { if ( ! defined ( $bfid = shift )) { die "$pgm: expected regexp for bfid match\n"; } # if $negative_field_search{ bfid } = 0; $got_field_args = 1; } # if elsif ( $arg =~ /^\+b$|^--bfid-not$/ ) { if ( ! defined ( $bfid = shift )) { die "$pgm: expected regexp for bfid negative match\n"; } # if $negative_field_search{ bfid } = 1; $got_field_args = 1; } # if elsif ( $arg =~ /^-d$|^--debug$/ ) { $debug = 1; } # if elsif ( $arg =~ /^-h$|^-\?$|^--help$/ ) { usage(); exit 0; } # if elsif ( $arg =~ /^-I$|^--inode$/ ) { if ( ! defined ( $inode = shift )) { die "$pgm: expected regexp to match inode\n"; } # if $negative_field_search{ inode } = 0; $got_field_args = 1; } # if elsif ( $arg =~ /^\+I$|^--inode-not$/ ) { if ( ! defined ( $inode = shift )) { die "$pgm: expected regexp for inode negative match\n"; } # if $negative_field_search{ inode } = 1; $got_field_args = 1; } # if elsif ( $arg =~ /^-i$|^--ignore-case$/ ) { $ignore_case = 1; } # if elsif ( $arg =~ /^-n$|^--dont-execute$/ ) { $do_the_search = 0; } # if elsif ( $arg =~ /^-p$|^--path$/ ) { if ( ! defined ( $path = shift )) { die "$pgm: expected regexp to match path\n"; } # if $negative_field_search{ path } = 0; $got_field_args = 1; } # if elsif ( $arg =~ /^\+p$|^--path-not$/ ) { if ( ! defined ( $path = shift )) { die "$pgm: expected regexp for path negative match\n"; } # if $negative_field_search{ path } = 1; $got_field_args = 1; } # if elsif ( $arg =~ /^-s$|^--state$/ ) { if ( ! defined ( $state = shift )) { die "$pgm: expected regexp to match state\n"; } # if $negative_field_search{ state } = 0; $got_field_args = 1; } # if elsif ( $arg =~ /^\+s$|^--state-not$/ ) { if ( ! defined ( $state = shift )) { die "$pgm: expected regexp for state negative match\n"; } # if $negative_field_search{ state } = 1; $got_field_args = 1; } # if elsif ( $arg =~ /^-v$|^--dont-match$/ ) { $negative_pattern_search = 1; } # if else { if ( $got_field_args ) { warn "$pgm: $arg: unknown option\n"; usage(); exit 1; } # if else { if ( ! $got_freestanding_pattern_arg ) { $pattern = $arg; $got_freestanding_pattern_arg = 1; } # if else { usage(); exit 1; } # if } # if } # if } # while if ( $got_field_args && ( ( defined $bfid && $bfid =~ /^\s*$/ ) || ( defined $inode && $inode =~ /^\s*$/ ) || ( defined $path && $path =~ /^\s*$/ ) || ( defined $state && $state =~ /^\s*$/ ) )) { # we will define the results of a search for a null component to be # empty; no error, though exit 0; } # if # see usage() if ( $got_freestanding_pattern_arg && $got_field_args ) { warn "$pgm: incompatible options\n"; usage(); exit 1; } # if # see usage() if ( ! ( $got_freestanding_pattern_arg || $got_field_args )) { warn "$pgm: not all required options present\n"; usage(); exit 1; } # if # debug output if debug option selected if ( $debug ) { for $i ( qw( bfid debug do_the_search ignore_case inode negative_pattern_search path pattern state )) { eval "print \"dbg: $i <\$$i>\\n\""; } # for if ( %negative_field_search ) { for $k ( keys %negative_field_search ) { print "\$negative_field_search\{ $k \}: <"; printf "%d", $negative_field_search{ $k }; print ">\n"; } # for } # if else { print "dbg: \%negative_field_search \n"; } # if } # if # O.K. Time for a discussion on regexp's the user will supply and # how they'll get translated into a pattern this script uses to match # database entries. The intent is for this script to act like grep. # The following table illustrates: # The user indicates this regexp The script translates the user's # be matched (or not matched): regexp into something like this: # string ^.*string.*$ # ^string ^string.*$ # string$ ^.*string$ # ^string$ ^string$ # (Actually, for the path field, we use dot-star, but for the other # fields, we use "\S+", because they can't contain whitespace and # they can't be empty.) # That is, where the user doesn't anchor the pattern we insert # dot-star and an appropriate anchor. This is necessary because of, # and different than GNU's locate because of our database layout. # Each of our databases has at least four whitespace delineated # fields. State, bfid, inode, and path. I say *at least* four # because it's possible that the path field has whitespace in it. # Not a good idea, but permissible. It is invalid for state, bfid, # or inode to have whitespace in them, so no matter how many more # whitespace delineated fields a database entry has than four, we # consider the first field to be state, the second to be bfid, the # third to be inode, and everything that is left is considered to be # the path. # O.K. So now we have four fields. We have choices. We can convert # the regexp(s) the user supplies into one regexp that applies to a # whole line in the database, or we can convert it (them) into four # separate regexp's that are applied individually to each field, # which we isolate by using split() on each database line. Because # of the extra processing needed to split the fields and do multiple # matches, I've chosen to opt for conversion to a single regexp that # can be applied to the entire database line without using split() # where possible. It's much more efficient, and hey, we're working # with databases with a significant aggregate size! # But we can't make a single regexp if the user indicates that a # regexp's compliment is to be used. Because there is no way in perl # (read: I don't know of a way) to use the normal matching operator # "=~" to match the compliment of a regexp. In other words, something # like: # $regexp =~ /!(expr)/ # doesn't do what it looks like it might do. Which is to *not* match # expr. So if a regexp is to be complimented, we must use split to # break up the database line into fields. Then we use "=~" for # positive matches and "!~" for compliment matches: # $regexp !~ /expr/ # (Well, O.K., we're not actually using split(). We're using the # expression: # /^(\S+)\s+(\S+)\s+(\S+)\s+(.*)/ # on each line in the database.) # Another problem is: if the user wants a compliment match (at least # one), then we either have logic that processes %negative_search (this # hash is explained above) and selects the correct branch of code to # execute one of the eight possible matching modes. # In the expression: # $s $s_pattern && $b $b_pattern && $p $p_pattern # can be either "=~" or "!~". Rather than having eight cases # (sixteen when we take into account case sensitivity) I've opted for # eval'ing an expression similar to this one in which each is a # variable set according to the values for each field in %negative_search: # "=~" if 0, and # "!~" if 1 # This way, a single statement can be evaluated, simplifying the # code. Maybe this was the wrong decision for performance, but here # I chose maintainability of the code. # $anything_or_nothing is defined for the non-path components of the search. # It is defined to be zero or more non-whitespace characters. That is, # it can be anything or nothing (null). $anything_or_nothing = '\\S*'; # $anything_but_something, can be anything, but cannot be null $anything_but_something = '\\S+'; $dot_star = '.*'; $beginning_anything = $ending_anything = $beginning_state_anything = $ending_state_anything = $beginning_bfid_anything = $ending_bfid_anything = $beginning_inode_anything = $ending_inode_anything = $anything_or_nothing; $beginning_path_anything = $ending_path_anything = $dot_star; if ( $got_freestanding_pattern_arg ) { if ( $pattern =~ /^\s*$/ ) { # we will define the results of a search for a null component to be # empty; no error, though exit 0; } # if if ( $pattern =~ /^\^/ ) { $pattern =~ s/^\^//; $beginning_path_anything = ""; } # if if ( $pattern =~ /\$$/ ) { $pattern =~ s/\$$//; $ending_path_anything = ""; } # if $pattern = "^\\S+\\s\\S+\\s\\S+\\s" . "${beginning_path_anything}" . "($pattern)" . "${ending_path_anything}\$"; } # if if ( $got_field_args ) { if ( ! defined $bfid ) { $bfid_pattern = $anything_but_something; } # if if ( ! defined $inode ) { $inode_pattern = $anything_but_something; } # if if ( ! defined $state ) { $state_pattern = $anything_but_something; } # if if ( ! defined $path ) { $path_pattern = $dot_star; } # if # The database is generated in such a way as to always report # states in uppercase and hex digits in bfid's in lower case, so # translate for case insensitivity $state = uc $state; $bfid = lc $bfid; if ( $state_pattern ne $anything_but_something ) { if ( $state =~ /^\^/ ) { $state =~ s/^\^//; $beginning_state_anything = ""; } # if if ( $state =~ /\$$/ ) { $state =~ s/\$$//; $ending_state_anything = ""; } # if $state_pattern = "${beginning_state_anything}($state)${ending_state_anything}"; } # if if ( $bfid_pattern ne $anything_but_something ) { if ( $bfid =~ /^\^/ ) { $bfid =~ s/^\^//; $beginning_bfid_anything = ""; } # if if ( $bfid =~ /\$$/ ) { $bfid =~ s/\$$//; $ending_bfid_anything = ""; } # if $bfid_pattern = "${beginning_bfid_anything}($bfid)${ending_bfid_anything}"; } # if if ( $inode_pattern ne $anything_but_something ) { if ( $inode =~ /^\^/ ) { $inode =~ s/^\^//; $beginning_inode_anything = ""; } # if if ( $inode =~ /\$$/ ) { $inode =~ s/\$$//; $ending_inode_anything = ""; } # if $inode_pattern = "${beginning_inode_anything}($inode)${ending_inode_anything}"; } # if if ( $path_pattern ne $dot_star ) { if ( $path =~ /^\^/ ) { $path =~ s/^\^//; $beginning_path_anything = ""; } # if if ( $path =~ /\$$/ ) { $path =~ s/\$$//; $ending_path_anything = ""; } # if $path_pattern = "${beginning_path_anything}($path)${ending_path_anything}"; } # if if ( at_least_one_compliment_match_was_indicated() ) { $state_pattern = "^${state_pattern}\$"; $bfid_pattern = "^${bfid_pattern}\$"; $inode_pattern = "^${inode_pattern}\$"; $path_pattern = "^${path_pattern}\$"; # collapse all occurrences of two or more .*'s to a single .* # (This code may have been obsolesced by using "\\S+" in # place of dot-star in non-path pattern fields.) $state_pattern =~ s/\.\*\.\*(\.\*)*/\.\*/g; $bfid_pattern =~ s/\.\*\.\*(\.\*)*/\.\*/g; $inode_pattern =~ s/\.\*\.\*(\.\*)*/\.\*/g; $path_pattern =~ s/\.\*\.\*(\.\*)*/\.\*/g; } # if else { $pattern = "^${state_pattern}\\s${bfid_pattern}" . "\\s${inode_pattern}\\s${path_pattern}\$"; } # if } # if # collapse all occurrences of two or more .*'s to a single .* $pattern =~ s/\.\*\.\*(\.\*)*/\.\*/g; # collapse all occurrences of two or more \S*'s to a single \S+ $pattern =~ s/\\S\+\\S\+(\\S\+)*/\\S\+/g; if ( $debug ) { print "dbg: at_least_one_compliment_match_was_indicated() <"; if ( at_least_one_compliment_match_was_indicated() ) { print "1>\n"; for $i ( qw( state bfid inode path )) { eval "print \"dbg: ${i}_pattern <\$${i}_pattern>\\n\""; } # for } # if else { print "0>\n"; print "dbg: pattern <$pattern>\n"; } # if } # if opendir DBDIR, $dmlocatedb_dir or die "$pgm: $dmlocatedb_dir: can't open: $!\n"; while ( defined( $file = readdir ( DBDIR ))) { if ( $file =~ /^dmlocatedb::.+/ ) { push @databases, $file; } # if } # while closedir DBDIR or warn "$pgm: $dmlocatedb_dir: can't close: $!\n"; if ( ! @databases ) { die "$pgm: cannot find any dmlocate databases in $dmlocatedb_dir\n"; } # if # for each database, fork a search process for $db ( @databases ) { if (( $pid = fork ) == 0 ) { $dbpath = "$dmlocatedb_dir/$db"; if ( $do_the_search ) { open DB, $dbpath or die "$pgm: $dbpath: can't open: $!\n"; if ( $got_freestanding_pattern_arg ) { if ( $negative_pattern_search ) { if ( $ignore_case ) { while ( ) { print if ! /$pattern/is; } # while } # if else { while ( ) { print if ! /$pattern/s; } # while } # if } # if else { if ( $ignore_case ) { while ( ) { print if /$pattern/is; } # while } # if else { while ( ) { print if /$pattern/s; } # while } # if } # if } # if # else we got field arguments else { if ( at_least_one_compliment_match_was_indicated() ) { $state_op = $bfid_op = $inode_op = $path_op = "=~"; $negative_field_search{ state } && ( $state_op = "!~" ); $negative_field_search{ bfid } && ( $bfid_op = "!~" ); $negative_field_search{ inode } && ( $inode_op = "!~" ); $negative_field_search{ path } && ( $path_op = "!~" ); $ignore_case && ( $case_op = "i" ); while ( ) { if ( /^(\S+)\s+(\S+)\s+(\S+)\s+(.*)/ ) { $state_field = $1; $bfid_field = $2; $inode_field = $3; $path_field = $4; } # if else { warn "$pgm: $dbpath: unparsable line: \"$_\"\n"; } # if $cmd = "if ( \$state_field $state_op /\$state_pattern/i" . " && \$bfid_field $bfid_op /\$bfid_pattern/i" . " && \$inode_field $inode_op /\$inode_pattern/i" . " && \$path_field $path_op /\$path_pattern/$case_op )" . "{" . " print" . "}"; if ( $debug ) { print "$cmd\n" if $debug; for $j ( qw ( state bfid inode path )) { eval "print \"${j}_field <\$${j}_field>\\n\""; eval "print \"${j}_pattern <\$${j}_pattern>\\n\""; } # for } # if eval $cmd if $do_the_search; } # while } # if else { if ( $ignore_case ) { while ( ) { print if /$pattern/is; } # while } # if else { while ( ) { print if /$pattern/s; } # while } # if } # if } # if close DB, $dbpath or warn "$pgm: $dbpath: can't close: $!\n"; } # if last; } # if } # for for $db ( @databases ) { wait; } # for #----------------------------------------------------------------------- sub usage { print STDERR <<"EndOfUsage"; usage: $pgm options $pgm [ non_field_options ] Pattern $pgm --help field options: -b,--bfid BFID Print database entries to stdout whose bfid fields match regexp BFID -I,--inode Inode Print database entries to stdout whose inode fields match regexp Inode. (Note the difference between this option and the lowercase "-i" option.) -p,--path Path Print database entries to stdout whose path fields match rexexp Path -s,--state State Print database entries to stdout whose state fields match rexexp State other options: -d,--debug Show debug output -h,--help List usage -i,--ignore-case Ignore case when matching path fields. (Note the difference between this option and the uppercase "-I" option.) -n,--dont-execute Do not execute dmlocate database searches (useful for debugging) -v,--dont-match Print entries whose path fields do not match Pattern. Incompatible with field options. When no field options are given, but a Pattern is, $pgm will print database entries whose path fields match regexp Pattern. Field options and a freestanding Pattern may not be specified on a command line. But either a subset of field options or a Pattern must be present. Matching state and bfid fields is always case-insensitive. Matching path fields is case sensitive unless --ignore-case is specified. To specify matching the compliment of a field option, simply change the dash to a plus if the single dash invocation of the field option is used. If the double dash invocation is used, append "-not" to the option name. Examples: "-s dul" becomes "+s dul"; "--bfid 0" becomes "--bfid-not 0" Caveat: matching by specifiying compliments is more time consuming than matching when no compliments are specified. If multiple instances of field options are given, the "last one in" wins. That is, if "--bfid 0 --bfid 3496 --bfid 711B" is given, neither "--bfid 0" nor "--bfid 3496" will be used for matching. Perl regular expressions are used. Refer to the perlre(1) man page. The default dmlocate database directory is $dmlocatedb_dir. To use an alternative directory, set the environment variable DMLOCATEDBDIR accordingly. EndOfUsage } # usage #----------------------------------------------------------------------- # Note the difference between: # # 1. if ( %negative_field_search ) # # and: # # 2. if ( at_least_one_compliment_match_was_indicated() ) # # #1 is true if the hash is not empty. #2, i.e., an invocation of # this function, is true if any of the values for any of the keys of # this hash are true. # # So, if the hash has four keys whose values are all zero, the #1 would # be true, but #2 would be false. # sub at_least_one_compliment_match_was_indicated { return grep /^1$/, values %negative_field_search; } # at_least_one_compliment_match_was_indicated