package Pitonyak::SafeGlob; #************************************************************ =head1 NAME Pitonyak::SafeGlob - Regular expressions and file specs for finding files and directories. =head1 SYNOPSIS File and directory scanning based on regular expressions. use Pitonyak::SafeGlob my $case_sensitive = 1; my $include_files = 1; my $include_dirs = 0; my $recurse_dirs = 1; my @full_specs = ('c:/*.txt', '/usr/home/andy/*.txt'); my @file_specs = ('*.txt, *.MSG'); my @file_regs = ('.*\.txt, .*\.MSG'); my @files; @files = glob_spec_from_path($case_sensitive, $include_dirs, $include_files, $recurse_dirs, @full_specs); @files = glob_spec($case_sensitive, $include_dirs, $include_files, 'c:/', @file_specs); @files = glob_regex($case_sensitive, $include_dirs, $include_files, 'c:/', @file_regs); =head1 DESCRIPTION There was a time when glob() returned an empty list if there were too many files in a directory. This module avoids this problem. In the following routines, if the C<$use_case> parameter evaluates to true, then matching will be done in a case sensitive manner. Matches are not case sensitive otherwise. In the following routines, if the C<$include_files> parameter evaluates to true, then files that match will be returned. In the following routines, if the C<$include_dirs> parameter evaluates to true, then directories that match will be returned. =cut #************************************************************ use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; $VERSION = '1.01'; @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw( case_sensitive copy new get_class_attribute glob_regex glob_regex_dirs glob_regex_files glob_spec glob_spec_from_path recurse return_dirs return_files ); use strict; use Carp; use File::Basename; use Pitonyak::DeepCopy qw(deep_copy); my %initial_attributes = ( 'recurse' => 0, 'return_dirs' => 1, 'return_files' => 1, 'case_sensitive' => 1, ); #************************************************************ =pod =head2 new =over 4 =item new() Note that this is written in such a manner that it can be inherited. Also note that it is written such that $obj2 = $obj1->new() is valid! =back =cut #************************************************************ sub new { my $self = shift; my $objref = bless {}, ref($self) || $self; $objref->initialize(); if ( ref($self) ) { $objref->copy($self); } return $objref; } #************************************************************ #** ** #** Input : None. ** #** ** #** Output: None. ** #** ** #** Notes : Not really needed ** #** ** #************************************************************ sub DESTROY { } #************************************************************ =pod =head2 case_sensitive =over 4 =item case_sensitive([0|1]) Set the attribute if a parameter is present. Return the state of the parameter. =back =cut #************************************************************ sub case_sensitive { return get_class_attribute( @_, 'case_sensitive' ); } #************************************************************ =pod =head2 copy =over 4 =item copy($object) Make a copy of this object C<$obj1->copy($obj2)> is the same as C<$obj1 = $obj2>. =back =cut #************************************************************ sub copy { $_[0]->close_log(); foreach my $key ( keys %initial_attributes ) { if ( UNIVERSAL::isa( $_[1]->{$key}, 'HASH' ) ) { # If I simply copy it over then we both reference the same thing! $_[0]->{$key} = deep_copy( $_[1]->{$key} ); } else { $_[0]->{$key} = $_[1]->{$key}; } } } #************************************************************ #** ** #** Input : None. ** #** ** #** Output: None. ** #** ** #** Notes : Initialize the data structure. ** #** ** #************************************************************ sub initialize { foreach my $key ( keys %initial_attributes ) { if ( UNIVERSAL::isa( $initial_attributes{$key}, 'HASH' ) ) { # If I simply copy it over then we both reference the same thing! $_[0]->{$key} = deep_copy( $initial_attributes{$key} ); } else { $_[0]->{$key} = $initial_attributes{$key}; } } } #************************************************************ =pod =head2 get_class_attribute Remember that the call C<$obj-Emethod(@parms)> is the same as C. =over 4 =item SafeGlob::get_class_attribute($attribute_name) If there is only one paramter, the first parameter is assumed to be an attribute name and the default attribute value is returned. =item $obj->get_class_attribute($attribute_name) If there are two parameters, then the first parameter is assumed to be a C object and the second parameter is assumed to be an attribute name. The attribute value for the object is returned. =item $obj->get_class_attribute($attribute_name, $attribute_value) If three parameters are given, then the first parameter is the object, the second parameter is used to set a new value for the attribute, and the third parameter is the attribute name, The attribute value is then returned. =back =cut #************************************************************ sub get_class_attribute { return $initial_attributes{ $_[0] } if $#_ == 0; return $_[0]->{ $_[1] } if $#_ == 1; $_[0]->{ $_[2] } = $_[1]; return $_[1]; } #************************************************************ =pod =head2 glob_regex =over 4 =item glob_regex($path, [@regular_expressions]) =back All regular expressions are assumed to follow the path. This returns the files and/or directories that match the regular expression in the given path. The directory tree is recursed if the recursion flag is set. =cut #************************************************************ sub glob_regex { if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_regex_dirs, expected at least two parameters, a path and regular expression' ); } my $obj; if ( UNIVERSAL::isa( $_[0], 'Pitonyak::SafeGlob' ) ) { $obj = shift; if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_regex_dirs, expected at least two parameters, a path and regular expression' ); } } else { $obj = new Pitonyak::SafeGlob(); } my @return_values = (); push ( @return_values, $obj->glob_regex_files(@_) ) if $obj->return_files(); push ( @return_values, $obj->glob_regex_dirs(@_) ) if $obj->return_dirs(); return @return_values; } #************************************************************ =pod =head2 glob_regex_dirs =over 4 =item glob_regex_dirs($path, [@dir_regular_expressions]) =back All regular expressions are assumed to follow the path. This returns the directories that match the regular expression in the given path. Recursion is done if the recurse parameter is set. =cut #************************************************************ sub glob_regex_dirs { if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_regex_dirs, expected at least two parameters, a path and regular expression' ); } my $obj; if ( UNIVERSAL::isa( $_[0], 'Pitonyak::SafeGlob' ) ) { $obj = shift; if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_regex_dirs, expected at least two parameters, a path and regular expression' ); } } else { $obj = new Pitonyak::SafeGlob(); } my @dir_list; my $path = shift; if ( !opendir( IND, $path ) ) { carp "Failed to open $path because $!"; } else { my @orig_dir_list = grep( -d "$path/$_", readdir(IND) ); closedir(IND); # Remove the directories '.' and '..' @orig_dir_list = grep( $_ ne '.' && $_ ne '..', @orig_dir_list ); # Now match the regular expression my $regex = scalar(@_) ? join ( '|', @_ ) : ''; @dir_list = grep { /$regex/i } @orig_dir_list if not $obj->case_sensitive(); @dir_list = grep { /$regex/ } @orig_dir_list if $obj->case_sensitive(); # Should we recurse the directory tree? if ( $obj->recurse() ) { foreach my $a_dir (@orig_dir_list) { push ( @dir_list, map { "$a_dir/$_" } $obj->glob_regex_dirs( "$path/$a_dir", $regex ) ); } } } return @dir_list; } #************************************************************ =pod =head2 glob_regex_files =over 4 =item glob_regex_files($path, [@file_regular_expressions]) =back All regular expressions are assumed to follow the path. This returns the files that match the regular expression in the given path. Recursion is done if the recurse parameter is set. =cut #************************************************************ sub glob_regex_files { if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_regex_files, expected at least two parameters, a path and regular expression' ); } my $obj; if ( UNIVERSAL::isa( $_[0], 'Pitonyak::SafeGlob' ) ) { $obj = shift; if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_regex_files, expected at least two parameters, a path and regular expression' ); } } else { $obj = new Pitonyak::SafeGlob(); } my @file_list; my $path = shift; if ( !opendir( IND, $path ) ) { carp "Failed to open $path because $!"; } else { # match the regular expression my $regex = scalar(@_) ? join ( '|', @_ ) : ''; @file_list = grep { /$regex/i && -f "$path/$_" } readdir(IND) if not $obj->case_sensitive(); @file_list = grep { /$regex/ && -f "$path/$_" } readdir(IND) if $obj->case_sensitive(); closedir(IND); # Should we recurse the directory tree? if ( $obj->recurse() ) { foreach my $a_dir ( $obj->glob_regex_dirs( $path, '.*' ) ) { if ( !opendir( IND, "$path/$a_dir" ) ) { carp "Failed to open $path because $!"; } else { # match the regular expression push ( @file_list, map { "$a_dir/$_" } grep { /$regex/i && -f "$path/$a_dir/$_" } readdir(IND) ) if not $obj->case_sensitive(); push ( @file_list, map { "$a_dir/$_" } grep { /$regex/ && -f "$path/$a_dir/$_" } readdir(IND) ) if $obj->case_sensitive(); closedir(IND); } } } } return @file_list; } #************************************************************ =pod =head2 glob_spec =over 4 =item glob_spec($path, [@regular_expressions]) =back All file specs are assumed to follow the path. The file specs are turned into regular expressions and then glob_regex is called. This returns the files that match the file specification in the given path. =cut #************************************************************ sub glob_spec { if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_spec, expected at least two parameters, a path and file spec' ); } my $obj; if ( UNIVERSAL::isa( $_[0], 'Pitonyak::SafeGlob' ) ) { $obj = shift; if ( $#_ < 1 ) { carp( 'Incorrect usage for glob_spec, expected at least two parameters, a path and file spec' ); } } else { $obj = new Pitonyak::SafeGlob(); } my $path = shift; my @reg_list = @_; foreach (@reg_list) { s/\./\\./go; # Convert '.' to '\.' s/\^/\\^/go; # Convert '^' to '\^' s/\$/\\\$/go; # Convert '$' to '\$' s/\?/./go; # Convert '?' to '.' s/\*/.*/go; # Convert '*' to '.*' $_ = "^$_\$"; # Place a '^' in front and '$' at the end. } return $obj->glob_regex( $path, @reg_list ); } #************************************************************ =pod =head2 glob_spec_from_path =over 4 =item glob_spec_from_path([@file_specs_with_dirs]) =back This assumes that it is given a list of file specs where the file specs contain leading directory entries. The file spec and path are separated using File::Basename::fileparse() and then glob_spec is called. =cut #************************************************************ sub glob_spec_from_path { if ( $#_ < 0 ) { carp( 'Incorrect usage for glob_spec_from_path, expected at least one parameter' ); } my $obj; if ( UNIVERSAL::isa( $_[0], 'Pitonyak::SafeGlob' ) ) { $obj = shift; } else { $obj = new Pitonyak::SafeGlob(); } my @list; my @suffixlist = (); foreach my $fullname (@_) { my ( $name, $path, $suffix ) = fileparse( $fullname, @suffixlist ); push ( @list, map { "$path$_" } $obj->glob_spec( $path, $name ) ); } return @list; } #************************************************************ =pod =head2 recurse =over 4 =item recurse([0|1]) Set the attribute if a parameter is present. Return the state of the parameter. =back =cut #************************************************************ sub recurse { return get_class_attribute( @_, 'recurse' ); } #************************************************************ =pod =head2 return_dirs =over 4 =item return_dirs([0|1]) Set the attribute if a parameter is present. Return the state of the parameter. =back =cut #************************************************************ sub return_dirs { return get_class_attribute( @_, 'return_dirs' ); } #************************************************************ =pod =head2 return_files =over 4 =item return_files([0|1]) Set the attribute if a parameter is present. Return the state of the parameter. =back =cut #************************************************************ sub return_files { return get_class_attribute( @_, 'return_files' ); } #************************************************************ =pod =head1 COPYRIGHT Copyright 1998-2002, Andrew Pitonyak (perlboy@pitonyak.org) This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 Modification History =head2 March 13, 1998 Version 1.00 First release =head2 September 10, 2002 Version 1.01 Changed internal documentation to POD documentation and support subdirectories =cut #************************************************************ 1;