#!/usr/bin/perl ############################################################################## # Tool for using regular expressions against the contents of files in a tar # archive. See 'ptargrep --help' for more documentation. # BEGIN { pop @INC if $INC[-1] eq '.' } use strict; use warnings; use Pod::Usage qw(pod2usage); use Getopt::Long qw(GetOptions); use Archive::Tar qw(); use File::Path qw(mkpath); my(%opt, $pattern); if(!GetOptions(\%opt, 'basename|b', 'ignore-case|i', 'list-only|l', 'verbose|v', 'help|?', )) { pod2usage(-exitval => 1, -verbose => 0); } pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; pod2usage(-exitval => 1, -verbose => 0, -message => "No pattern specified", ) unless @ARGV; make_pattern( shift(@ARGV) ); pod2usage(-exitval => 1, -verbose => 0, -message => "No tar files specified", ) unless @ARGV; process_archive($_) foreach @ARGV; exit 0; sub make_pattern { my($pat) = @_; if($opt{'ignore-case'}) { $pattern = qr{(?im)$pat}; } else { $pattern = qr{(?m)$pat}; } } sub process_archive { my($filename) = @_; _log("Processing archive: $filename"); my $next = Archive::Tar->iter($filename); while( my $f = $next->() ) { next unless $f->is_file; match_file($f) if $f->size > 0; } } sub match_file { my($f) = @_; my $path = $f->name; my $prefix = $f->prefix; if (defined $prefix) { $path = File::Spec->catfile($prefix, $path); } _log("filename: %s (%d bytes)", $path, $f->size); my $body = $f->get_content(); if($body !~ $pattern) { _log(" no match"); return; } if($opt{'list-only'}) { print $path, "\n"; return; } save_file($path, $body); } sub save_file { my($path, $body) = @_; _log(" found match - extracting"); my($fh); my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; if($dir and not $opt{basename}) { _log(" writing to $dir/$file"); $dir =~ s{\A/}{./}; mkpath($dir) unless -d $dir; open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; } else { _log(" writing to ./$file"); open $fh, '>', $file or die "open($file): $!"; } print $fh $body; close($fh); } sub _log { return unless $opt{verbose}; my($format, @args) = @_; warn sprintf($format, @args) . "\n"; } __END__ =head1 NAME ptargrep - Apply pattern matching to the contents of files in a tar archive =head1 SYNOPSIS ptargrep [options] ... Options: --basename|-b ignore directory paths from archive --ignore-case|-i do case-insensitive pattern matching --list-only|-l list matching filenames rather than extracting matches --verbose|-v write debugging message to STDERR --help|-? detailed help message =head1 DESCRIPTION This utility allows you to apply pattern matching to B of files contained in a tar archive. You might use this to identify all files in an archive which contain lines matching the specified pattern and either print out the pathnames or extract the files. The pattern will be used as a Perl regular expression (as opposed to a simple grep regex). Multiple tar archive filenames can be specified - they will each be processed in turn. =head1 OPTIONS =over 4 =item B<--basename> (alias -b) When matching files are extracted, ignore the directory path from the archive and write to the current directory using the basename of the file from the archive. Beware: if two matching files in the archive have the same basename, the second file extracted will overwrite the first. =item B<--ignore-case> (alias -i) Make pattern matching case-insensitive. =item B<--list-only> (alias -l) Print the pathname of each matching file from the archive to STDOUT. Without this option, the default behaviour is to extract each matching file. =item B<--verbose> (alias -v) Log debugging info to STDERR. =item B<--help> (alias -?) Display this documentation. =back =head1 COPYRIGHT Copyright 2010 Grant McLean Egrantm@cpan.orgE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut