install/5.005/DBD-Pg-1.22-fixvercmp/t/lib/App/Info/Util.pm

   1 package App::Info::Util;
   2
   3 # $Id: Util.pm,v 1.1 2004-04-29 09:21:28 ivan Exp $
   4
   5 =head1 NAME
   6
   7 App::Info::Util - Utility class for App::Info subclasses
   8
   9 =head1 SYNOPSIS
  10
  11   use App::Info::Util;
  12
  13   my $util = App::Info::Util->new;
  14
  15   # Subclasses File::Spec.
  16   my @paths = $util->paths;
  17
  18   # First directory that exists in a list.
  19   my $dir = $util->first_dir(@paths);
  20
  21   # First directory that exists in a path.
  22   $dir = $util->first_path($ENV{PATH});
  23
  24   # First file that exists in a list.
  25   my $file = $util->first_file('this.txt', '/that.txt', 'C:\\foo.txt');
  26
  27   # First file found among file base names and directories.
  28   my $files = ['this.txt', 'that.txt'];
  29   $file = $util->first_cat_file($files, @paths);
  30
  31 =head1 DESCRIPTION
  32
  33 This class subclasses L<File::Spec|File::Spec> and adds its own methods in
  34 order to offer utility methods to L<App::Info|App::Info> classes. Although
  35 intended to be used by App::Info subclasses, in truth App::Info::Util's
  36 utility may be considered more general, so feel free to use it elsewhere.
  37
  38 The methods added in addition to the usual File::Spec suspects are designed to
  39 facilitate locating files and directories on the file system, as well as
  40 searching those files. The assumption is that, in order to provide useful
  41 metadata about a given software package, an App::Info subclass must find
  42 relevant files and directories and parse them with regular expressions. This
  43 class offers methods that simplify those tasks.
  44
  45 =cut
  46
  47 use strict;
  48 use File::Spec ();
  49 use vars qw(@ISA $VERSION);
  50 @ISA = qw(File::Spec);
  51 $VERSION = '0.22';
  52
  53 my %path_dems = (MacOS   => qr',',
  54                  MSWin32 => qr';',
  55                  os2     => qr';',
  56                  VMS     => undef,
  57                  epoc    => undef);
  58
  59 my $path_dem = exists $path_dems{$^O} ? $path_dems{$^O} : qr':';
  60
  61 =head1 CONSTRUCTOR
  62
  63 =head2 new
  64
  65   my $util = App::Info::Util->new;
  66
  67 This is a very simple constructor that merely returns an App::Info::Util
  68 object. Since, like its File::Spec super class, App::Info::Util manages no
  69 internal data itself, all methods may be used as class methods, if one prefers
  70 to. The constructor here is provided merely as a convenience.
  71
  72 =cut
  73
  74 sub new { bless {}, ref $_[0] || $_[0] }
  75
  76 =head1 OBJECT METHODS
  77
  78 In addition to all of the methods offered by its super class,
  79 L<File::Spec|File::Spec>, App::Info::Util offers the following methods.
  80
  81 =head2 first_dir
  82
  83   my @paths = $util->paths;
  84   my $dir = $util->first_dir(@dirs);
  85
  86 Returns the first file system directory in @paths that exists on the local
  87 file system. Only the first item in @paths that exists as a directory will be
  88 returned; any other paths leading to non-directories will be ignored.
  89
  90 =cut
  91
  92 sub first_dir {
  93     shift;
  94     foreach (@_) { return $_ if -d }
  95     return;
  96 }
  97
  98 =head2 first_path
  99
 100   my $path = $ENV{PATH};
 101   $dir = $util->first_path($path);
 102
 103 Takes the $path string and splits it into a list of directory paths, based on
 104 the path demarcator on the local file system. Then calls C<first_dir()> to
 105 return the first directoy in the path list that exists on the local file
 106 system. The path demarcator is specified for the following file systems:
 107
 108 =over 4
 109
 110 =item MacOS: ","
 111
 112 =item MSWin32: ";"
 113
 114 =item os2: ";"
 115
 116 =item VMS: undef
 117
 118 This method always returns undef on VMS. Patches welcome.
 119
 120 =item epoc: undef
 121
 122 This method always returns undef on epoch. Patches welcome.
 123
 124 =item Unix: ":"
 125
 126 All other operating systems are assumed to be Unix-based.
 127
 128 =back
 129
 130 =cut
 131
 132 sub first_path {
 133     return unless $path_dem;
 134     shift->first_dir(split /$path_dem/, shift)
 135 }
 136
 137 =head2 first_file
 138
 139   my $file = $util->first_file(@filelist);
 140
 141 Examines each of the files in @filelist and returns the first one that exists
 142 on the file system. The file must be a regular file -- directories will be
 143 ignored.
 144
 145 =cut
 146
 147 sub first_file {
 148     shift;
 149     foreach (@_) { return $_ if -f }
 150     return;
 151 }
 152
 153 =head2 first_exe
 154
 155   my $exe = $util->first_exe(@exelist);
 156
 157 Examines each of the files in @exelist and returns the first one that exists
 158 on the file system as an executable file. Directories will be ignored.
 159
 160 =cut
 161
 162 sub first_exe {
 163     shift;
 164     foreach (@_) { return $_ if -f && -x }
 165     return;
 166 }
 167
 168 =head2 first_cat_path
 169
 170   my $file = $util->first_cat_path('ick.txt', @paths);
 171   $file = $util->first_cat_path(['this.txt', 'that.txt'], @paths);
 172
 173 The first argument to this method may be either a file or directory base name
 174 (that is, a file or directory name without a full path specification), or a
 175 reference to an array of file or directory base names. The remaining arguments
 176 constitute a list of directory paths. C<first_cat_path()> processes each of
 177 these directory paths, concatenates (by the method native to the local
 178 operating system) each of the file or directory base names, and returns the
 179 first one that exists on the file system.
 180
 181 For example, let us say that we were looking for a file called either F<httpd>
 182 or F<apache>, and it could be in any of the following paths:
 183 F</usr/local/bin>, F</usr/bin/>, F</bin>. The method call looks like this:
 184
 185   my $httpd = $util->first_cat_path(['httpd', 'apache'], '/usr/local/bin',
 186                                     '/usr/bin/', '/bin');
 187
 188 If the OS is a Unix variant, C<first_cat_path()> will then look for the first
 189 file that exists in this order:
 190
 191 =over 4
 192
 193 =item /usr/local/bin/httpd
 194
 195 =item /usr/local/bin/apache
 196
 197 =item /usr/bin/httpd
 198
 199 =item /usr/bin/apache
 200
 201 =item /bin/httpd
 202
 203 =item /bin/apache
 204
 205 =back
 206
 207 The first of these complete paths to be found will be returned. If none are
 208 found, then undef will be returned.
 209
 210 =cut
 211
 212 sub first_cat_path {
 213     my $self = shift;
 214     my $files = ref $_[0] ? shift() : [shift()];
 215     foreach my $p (@_) {
 216         foreach my $f (@$files) {
 217             my $path = $self->catfile($p, $f);
 218             return $path if -e $path;
 219         }
 220     }
 221     return;
 222 }
 223
 224 =head2 first_cat_dir
 225
 226   my $dir = $util->first_cat_dir('ick.txt', @paths);
 227   $dir = $util->first_cat_dir(['this.txt', 'that.txt'], @paths);
 228
 229 Funtionally identical to C<first_cat_path()>, except that it returns the
 230 directory path in which the first file was found, rather than the full
 231 concatenated path. Thus, in the above example, if the file found was
 232 F</usr/bin/httpd>, while C<first_cat_path()> would return that value,
 233 C<first_cat_dir()> would return F</usr/bin> instead.
 234
 235 =cut
 236
 237 sub first_cat_dir {
 238     my $self = shift;
 239     my $files = ref $_[0] ? shift() : [shift()];
 240     foreach my $p (@_) {
 241         foreach my $f (@$files) {
 242             my $path = $self->catfile($p, $f);
 243             return $p if -e $path;
 244         }
 245     }
 246     return;
 247 }
 248
 249 =head2 first_cat_exe
 250
 251   my $exe = $util->first_cat_exe('ick.txt', @paths);
 252   $exe = $util->first_cat_exe(['this.txt', 'that.txt'], @paths);
 253
 254 Funtionally identical to C<first_cat_path()>, except that it returns the full
 255 path to the first executable file found, rather than simply the first file
 256 found.
 257
 258 =cut
 259
 260 sub first_cat_exe {
 261     my $self = shift;
 262     my $files = ref $_[0] ? shift() : [shift()];
 263     foreach my $p (@_) {
 264         foreach my $f (@$files) {
 265             my $path = $self->catfile($p, $f);
 266             return $path if -f $path && -x $path;
 267         }
 268     }
 269     return;
 270 }
 271
 272 =head2 search_file
 273
 274   my $file = 'foo.txt';
 275   my $regex = qr/(text\s+to\s+find)/;
 276   my $value = $util->search_file($file, $regex);
 277
 278 Opens C<$file> and executes the C<$regex> regular expression against each line
 279 in the file. Once the line matches and one or more values is returned by the
 280 match, the file is closed and the value or values returned.
 281
 282 For example, say F<foo.txt> contains the line "Version 6.5, patch level 8",
 283 and you need to grab each of the three version parts. All three parts can
 284 be grabbed like this:
 285
 286   my $regex = qr/Version\s+(\d+)\.(\d+),[^\d]*(\d+)/;
 287   my @nums = $util->search_file($file, $regex);
 288
 289 Now C<@nums> will contain the values C<(6, 5, 8)>. Note that in a scalar
 290 context, the above search would yeild an array reference:
 291
 292   my $regex = qr/Version\s+(\d+)\.(\d+),[^\d]*(\d+)/;
 293   my $nums = $util->search_file($file, $regex);
 294
 295 So now C<$nums> contains C<[6, 5, 8]>. The same does not hold true if the
 296 match returns only one value, however. Say F<foo.txt> contains the line
 297 "king of the who?", and you wish to know who the king is king of. Either
 298 of the following two calls would get you the data you need:
 299
 300   my $minions = $util->search_file($file, qr/King\s+of\s+(.*)/);
 301   my @minions = $util->search_file($file, qr/King\s+of\s+(.*)/);
 302
 303 In the first case, because the regular expression contains only one set of
 304 parentheses, C<search_file()> will simply return that value: C<$minions>
 305 contains the string "the who?". In the latter case, C<@minions> of course
 306 contains a single element: C<("the who?")>.
 307
 308 Note that a regular expression without parentheses -- that is, one that
 309 doesn't grab values and put them into $1, $2, etc., will never successfully
 310 match a line in this method. You must include something to parentetically
 311 match. If you just want to know the value of what was matched, parenthesize
 312 the whole thing and if the value returns, you have a match. Also, if you need
 313 to match patterns across lines, try using multiple regular expressions with
 314 C<multi_search_file()>, instead.
 315
 316 =cut
 317
 318 sub search_file {
 319     my ($self, $file, $regex) = @_;
 320     return unless $file && $regex;
 321     open F, "<$file" or Carp::croak "Cannot open $file: $!\n";
 322     my @ret;
 323     while (<F>) {
 324         # If we find a match, we're done.
 325         (@ret) = /$regex/ and last;
 326     }
 327     close F;
 328     # If the match returned an more than one value, always return the full
 329     # array. Otherwise, return just the first value in a scalar context.
 330     return unless @ret;
 331     return wantarray ? @ret : $#ret <= 0 ? $ret[0] : \@ret;
 332 }
 333
 334 =head2 multi_search_file
 335
 336   my @regexen = (qr/(one)/, qr/(two)\s+(three)/);
 337   my @matches = $util->multi_search_file($file, @regexen);
 338
 339 Like C<search_file()>, this mehod opens C<$file> and parses it for regular
 340 expresion matches. This method, however, can take a list of regular
 341 expressions to look for, and will return the values found for all of them.
 342 Regular expressions that match and return multiple values will be returned as
 343 array referernces, while those that match and return a single value will
 344 return just that single value.
 345
 346 For example, say you are parsing a file with lines like the following:
 347
 348   #define XML_MAJOR_VERSION 1
 349   #define XML_MINOR_VERSION 95
 350   #define XML_MICRO_VERSION 2
 351
 352 You need to get each of these numbers, but calling C<search_file()> for each
 353 of them would be wasteful, as each call to C<search_file()> opens the file and
 354 parses it. With C<multi_search_file()>, on the other hand, the file will be
 355 opened only once, and, once all of the regular expressions have returned
 356 matches, the file will be closed and the matches returned.
 357
 358 Thus the above values can be collected like this:
 359
 360   my @regexen = ( qr/XML_MAJOR_VERSION\s+(\d+)$/,
 361                   qr/XML_MINOR_VERSION\s+(\d+)$/,
 362                   qr/XML_MICRO_VERSION\s+(\d+)$/ );
 363
 364   my @nums = $file->multi_search_file($file, @regexen);
 365
 366 The result will be that C<@nums> contains C<(1, 95, 2)>. Note that
 367 C<multi_file_search()> tries to do the right thing by only parsing the file
 368 until all of the regular expressions have been matched. Thus, a large file
 369 with the values you need near the top can be parsed very quickly.
 370
 371 As with C<search_file()>, C<multi_search_file()> can take regular expressions
 372 that match multiple values. These will be returned as array references. For
 373 example, say the file you're parsing has files like this:
 374
 375   FooApp Version 4
 376   Subversion 2, Microversion 6
 377
 378 To get all of the version numbers, you can either use three regular
 379 expressions, as in the previous example:
 380
 381   my @regexen = ( qr/FooApp\s+Version\s+(\d+)$/,
 382                   qr/Subversion\s+(\d+),/,
 383                   qr/Microversion\s+(\d$)$/ );
 384
 385   my @nums = $file->multi_search_file($file, @regexen);
 386
 387 In which case C<@nums> will contain C<(4, 2, 6)>. Or, you can use just two
 388 regular expressions:
 389
 390   my @regexen = ( qr/FooApp\s+Version\s+(\d+)$/,
 391                   qr/Subversion\s+(\d+),\s+Microversion\s+(\d$)$/ );
 392
 393   my @nums = $file->multi_search_file($file, @regexen);
 394
 395 In which case C<@nums> will contain C<(4, [2, 6])>. Note that the two
 396 parentheses that return values in the second regular expression cause the
 397 matches to be returned as an array reference.
 398
 399 =cut
 400
 401 sub multi_search_file {
 402     my ($self, $file, @regexen) = @_;
 403     return unless $file && @regexen;
 404     my @each = @regexen;
 405     open F, "<$file" or Carp::croak "Cannot open $file: $!\n";
 406     my %ret;
 407     while (my $line = <F>) {
 408         my @splice;
 409         # Process each of the regular expresssions.
 410         for (my $i = 0; $i < @each; $i++) {
 411             if ((my @ret) = $line =~ /$each[$i]/) {
 412                 # We have a match! If there's one match returned, just grab
 413                 # it. If there's more than one, keep it as an array ref.
 414                 $ret{$each[$i]} = $#ret > 0 ? \@ret : $ret[0];
 415                 # We got values for this regex, so not its place in the @each
 416                 # array.
 417                 push @splice, $i;
 418             }
 419         }
 420         # Remove any regexen that have already found a match.
 421         for (@splice) { splice @each, $_, 1 }
 422         # If there are no more regexes, we're done -- no need to keep
 423         # processing lines in the file!
 424         last unless @each;
 425     }
 426     close F;
 427     return unless %ret;
 428     return wantarray ? @ret{@regexen} : \@ret{@regexen};
 429 }
 430
 431 1;
 432 __END__
 433
 434 =head1 BUGS
 435
 436 Report all bugs via the CPAN Request Tracker at
 437 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=App-Info>.
 438
 439 =head1 AUTHOR
 440
 441 David Wheeler <L<david@wheeler.net|"david@wheeler.net">>
 442
 443 =head1 SEE ALSO
 444
 445 L<App::Info|App::Info>, L<File::Spec|File::Spec>,
 446 L<App::Info::HTTPD::Apache|App::Info::HTTPD::Apache>
 447 L<App::Info::RDBMS::PostgreSQL|App::Info::RDBMS::PostgreSQL>
 448
 449 =head1 COPYRIGHT AND LICENSE
 450
 451 Copyright (c) 2002, David Wheeler. All Rights Reserved.
 452
 453 This module is free software; you can redistribute it and/or modify it under the
 454 same terms as Perl itself.
 455
 456 =cut