3 # Script to extract files from Debian packages
4 # Copyright 2004-2007 Frank Lichtenheld <frank@lichtenheld.de>
6 # based on a shell script which was
7 # Copyright 2003 Noel Köthe
8 # Copyright 2004 Martin Schulze <joey@debian.org>
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with this program; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 use lib "$FindBin::Bin/../lib";
29 use lib "$FindBin::Bin";
32 use File::Temp qw( tempdir );
38 use Parse::DebControl;
39 use Parse::DebianChangelog;
41 use Fcntl qw(:DEFAULT :flock);
45 use constant PKGPOOL => 1;
46 use constant DIGESTPOOL => 2;
48 my $PROGNAME = 'extract_files';
52 my $directory = cwd()."/pool";
55 my $target = cwd()."/extracted_files";
57 my $configdir = cwd()."/etc";
58 my ( $verbose, $version, $help, $debug, $force, $use_dump );
61 'verbose|v' => \$verbose,
63 'directory|d=s' => \$directory,
64 'config|c=s' => \$configdir,
65 'target|t=s' => \$target,
66 'workdir|w=s' => \$workdir,
67 'cachefile=s' => \$cachefile,
68 'dumpfile=s' => \$dumpfile,
69 'use_dump' => \$use_dump,
70 'version' => \$version,
75 my (%src_packages, %bin_packages, %cache);
80 already_extracted => 0,
85 Getopt::Long::config('no_getopt_compat', 'no_auto_abbrev');
87 GetOptions(%opthash) or do_error( "couldn't parse commandline parameters" );
90 $directory =~ s,/+$,,o;
92 $TEMPDIR = tempdir( 'pdo_extract_file.XXXXXX',
93 DIR => $workdir, CLEANUP => 1 );
95 $TEMPDIR = tempdir( 'pdo_extract_file.XXXXXX',
99 ##################################################
103 die "$PROGNAME: FATAL: @_\n";
108 warn "$PROGNAME: WARNING: @_\n";
109 if (++$no_warnings > $MAXWARN) {
110 do_error( "too many warnings ($MAXWARN)" );
116 print "$PROGNAME: INFO: @_\n";
122 print "$PROGNAME: DEBUG: @_\n";
130 $$log .= localtime().": @_\n";
134 my $filename = shift;
135 sysopen(H, $filename, O_WRONLY|O_NONBLOCK|O_CREAT) or return undef;
140 ##################################################
141 # PACKAGE HANDLING (UNPACKING/CLEANUP)
144 my ( $pkgname, $dscname, $log ) = @_;
146 chdir( $TEMPDIR ) or do_error( "couldn't change working directory to $TEMPDIR" );
148 add_log( $log, "dpkg-source -sn -x $dscname $pkgname+source" );
150 system("dpkg-source", "-sn", "-x", $dscname, "$pkgname+source" ) == 0
152 do_warning( "couldn't unpack $dscname: $!" );
153 add_log( $log, "couldn't unpack $dscname: $!" );
157 return "$pkgname+source";
161 my ( $pkgname, $debname, $log ) = @_;
163 add_log( $log, "unpacking binary package $pkgname" );
165 mkdir( "$TEMPDIR/$pkgname" ) or do {
166 do_warning( "couldn't create directory $TEMPDIR/$pkgname" );
167 add_log( $log, "couldn't create directory $TEMPDIR/$pkgname" );
171 add_log( $log, "dpkg-deb --extract $debname $TEMPDIR/$pkgname" );
173 system("dpkg-deb", "--extract", $debname, "$TEMPDIR/$pkgname" ) == 0
175 do_warning( "couldn't unpack $debname" );
176 add_log( $log, "couldn't unpack $debname" );
183 sub unpack_allbinpkg {
184 my ($pkg_data, $log) = @_;
188 foreach my $pkg (@{$pkg_data->{bins}}) {
189 next if $already_seen{$pkg->{bin_name}}; # some assumptions about sane version numbers included
191 unpack_binpkg($pkg->{bin_name}, $pkg->{deb}, $log );
193 $already_seen{$pkg->{bin_name}}++;
200 foreach my $pkg (keys %{$pkg_data->{bin_list}}) {
201 # rmtree should do that itself, but there seems to be a bug somewhere
202 system( "chmod", "-R", "u+rwx", "$TEMPDIR/$pkg" );
203 rmtree( "$TEMPDIR/$pkg" );
207 ##################################################
210 sub pkg_pool_directory {
213 my $name = $pkg_data->{src_name};
214 my $version = $pkg_data->{src_version};
215 my $dscname = $pkg_data->{dsc};
219 # I would prefer $name_$version but lets be backward compatible
220 # in case someone depends on the naming
221 if ($name =~ /^(lib.)/o) {
222 # $dir .= "$1/$name/${name}_$version";
223 $dir .= "$1/$name/".basename($dscname, '.dsc');
225 # $dir .= substr($name,0,1)."/$name/${name}_$version";
226 $dir .= substr($name,0,1)."/$name/".basename($dscname, '.dsc');
233 my ($pkg_data, $config_data, $log) = @_;
235 if ($config_data->{structure} == PKGPOOL) {
236 my $dir = "$target/".pkg_pool_directory( $pkg_data );
237 if (!$force && -f "$dir/log") {
238 (utime(undef,undef,"$dir/log") == 1)
239 or do_warning( "touch of $dir/log failed" );
245 } elsif ($config_data->{structure} == DIGESTPOOL) {
246 die "UNIMPLEMENTED!";
248 do_error( "unknown pool structure $config_data->{structure}" );
253 my ($dir, $log) = @_;
255 open my $logfh, ">$dir/log" or do_error( "couldn't open log file $dir/log.\n$log" );
256 flock $logfh, LOCK_EX or do_error( "couldn't lock log file $dir/log" );;
260 close $logfh or do_warning( "couldn't close log file $dir/log" );
263 ##################################################
266 sub extract_copyright_to_pkgpool {
267 my ($pkg_data, $config_data, $log, $source_dir, $target_dir) = @_;
269 add_log( $log, "copy copyright file from source package" );
271 my $src_tgt = "$target_dir/copyright";
272 copy( "$source_dir/debian/copyright", $src_tgt )
273 or add_log( $log, "seems to have failed: $!" );
275 foreach my $bin_pkg (keys %{$pkg_data->{bin_list}}) {
277 my $usd = "$TEMPDIR/$bin_pkg/usr/share/doc/$bin_pkg";
278 my $cpy = "$usd/copyright";
279 my $tgt = "$target_dir/$bin_pkg.copyright";
282 add_log( $log, "copy copyright file from binary package $bin_pkg" );
284 or add_log( $log, "seems to have failed: $!" );
286 add_log( $log, "copyright file $cpy is symlink, I can't handle that" );
288 add_log( $log, "doc directory $usd is symlink" );
289 my $link = readlink($usd) or add_log( $log, "readlink $usd failed" );
290 if ($link && $link =~ m,^(?:\./)?(\S+)/?$,o) { # do a little sanity check
292 if ($pkg_data->{bin_list}{$pkg2}) {
293 add_log( $log, "symlink points to $pkg2, make symlink to copyright file" );
294 (symlink( "$pkg2.copyright", $tgt ) == 1 )
295 or add_log( $log, "symlink creation failed" );
297 add_log( $log, "symlink points to $pkg2, don't know what to do with that" );
300 add_log( $log, "link seems fishy, not using" );
304 unless (-e $tgt || -l $tgt) { # if it is a link, we can't be sure that the target package was already processed
305 add_log( $log, "copyright file $tgt still doesn't exist" );
307 add_log( $log, "copyright file of the source package exists, make symlink" );
308 (symlink( "copyright", $tgt ) == 1 )
309 or add_log( $log, "symlink generation failed" );
311 add_log( $log, "give up on $bin_pkg" );
313 or add_log( $log, "even the touch of $tgt.ERROR failed :(" );
319 unless (-e $src_tgt) {
320 add_log( $log, "copyright file $src_tgt still doesn't exist" );
321 # take one of the binary packages, prefering one that has
322 # the same name as the source package
323 foreach my $bin_pkg (($pkg_data->{src_name},
324 keys %{$pkg_data->{bin_list}})) {
325 if (-e "$target_dir/$bin_pkg.copyright") {
326 add_log( $log, "copyright file $target_dir/$bin_pkg.copyright seems like a good guess to me, make a symlink" );
327 (symlink( "$bin_pkg.copyright", $src_tgt ) == 1 )
329 add_log( $log, "symlink generation failed" );
335 unless (-e $src_tgt) {
336 add_log( $log, "give up" );
337 touch("$src_tgt.ERROR") or
338 add_log( $log, "even the touch of $src_tgt.ERROR failed :(" );
343 sub extract_changelog_to_pkgpool {
344 my ($pkg_data, $config_data, $log, $source_dir, $target_dir) = @_;
346 add_log( $log, "copy changelog file from source package" );
348 my $src_changelog = copy( "$source_dir/debian/changelog",
349 "$target_dir/changelog.txt" );
351 if ($src_changelog) {
352 add_log( $log, "changelog file sucessfully copied" );
354 add_log( $log, "seems to have failed: $!" );
357 add_log( $log, "create enhanced HTML version" );
358 my $chg = Parse::DebianChangelog->init;
359 my $parsed = $chg->parse( { infile => "$source_dir/debian/changelog" } );
361 $chg->html( { outfile => "$target_dir/changelog.html",
362 template => "$configdir/tmpl/default.tmpl" } );
363 add_log( $log, scalar $chg->get_parse_errors )
364 if $chg->get_parse_errors;
366 do_warning( $chg->get_error );
367 add_log( $log, $chg->get_error );
371 sub manage_current_link {
372 my ($pkg_data, $config_data, $log, $source_dir, $target_dir) = @_;
374 my $parent_dir = dirname($target_dir);
375 my $dirname = basename($target_dir);
376 my $current_link = "$parent_dir/current";
377 add_log( $log, "parent_dir=$parent_dir; dirname=$dirname" );
378 unless (-l $current_link) {
379 add_log( $log, "create new current link" );
380 (chdir( $parent_dir ) and
381 (symlink( $dirname, 'current' ) == 1 )) or
382 add_log( $log, "creating new current link failed: $!" );
384 my $old_target = readlink( $current_link );
385 (my $old_version = $old_target) =~ s/^[^_]*_//o;
386 if (version_cmp( $pkg_data->{src_version},
389 "old_version=$old_version; overwriting current link" );
390 (chdir( $parent_dir ) and
391 unlink( 'current' ) and
392 (symlink( $dirname, 'current' ) == 1 )) or
393 add_log( $log, "overwriting current link failed: $!" );
396 "old_version=$old_version; not touching current link" );
402 my ($pkg_data, $config_data) = @_;
405 add_log( \$log, "process source package $pkg_data->{src_name} ($pkg_data->{src_version})" );
407 unless (to_update( $pkg_data, $config_data, \$log )) {
408 $stats{already_extracted}++;
409 do_debug( "source package $pkg_data->{src_name} ($pkg_data->{src_version}) doesn't need extracting" );
413 if (my $source_dir = unpack_srcpkg( $pkg_data->{src_name}, $pkg_data->{dsc}, \$log )) {
415 $source_dir = "$TEMPDIR/$source_dir";
417 unpack_allbinpkg($pkg_data, \$log);
419 my $target_dir = "$target/".pkg_pool_directory($pkg_data);
420 add_log( \$log, "source_dir=$source_dir; target_dir=$target_dir" );
422 mkpath( $target_dir );
424 if ($config_data->{structure} == PKGPOOL) {
425 extract_copyright_to_pkgpool( $pkg_data, $config_data, \$log,
426 $source_dir, $target_dir );
427 extract_changelog_to_pkgpool( $pkg_data, $config_data, \$log,
428 $source_dir, $target_dir );
429 manage_current_link( $pkg_data, $config_data, \$log,
430 $source_dir, $target_dir );
431 } elsif ($config_data->{structure} == DIGESTPOOL) {
432 die "UNIMPLEMENTED!";
434 do_error( "unknown pool structure $config_data->{structure}" );
437 # rmtree should do that itself, but there seems to be a bug somewhere
438 system( "chmod", "-R", "u+rwx", "$source_dir" );
439 rmtree( $source_dir );
440 cleanup_binpkg($pkg_data);
441 write_log( $target_dir, $log );
445 sub extract_from_all {
446 my ( $src_packages ) = @_;
448 unless (-d $target) {
449 mkpath( $target ) or do_error( "couldn't create target directory" );
452 # TODO: make configurable
454 structure => PKGPOOL,
457 do_info( scalar(keys(%$src_packages))." source packages to process" );
458 foreach my $p (keys %$src_packages) {
459 foreach my $v (keys %{$src_packages->{$p}}) {
460 extract_files( $src_packages->{$p}{$v}, \%config );
465 ##################################################
466 # COLLECTING INFORMATION
468 sub merge_src_bin_packages {
469 my ( $src_packages, $bin_packages ) = @_;
471 foreach my $p (keys %$bin_packages) { # packages
472 foreach my $v (keys %{$bin_packages->{$p}}) { # versions
473 foreach my $a (keys %{$bin_packages->{$p}{$v}}) { # architectures
474 my %bin_data = %{$bin_packages->{$p}{$v}{$a}};
476 if (exists $src_packages->{$bin_data{bin_src}}{$bin_data{bin_src_version}}) {
477 $src_packages->{$bin_data{bin_src}}{$bin_data{bin_src_version}}{bins} ||= [];
478 push @{$src_packages->{$bin_data{bin_src}}{$bin_data{bin_src_version}}{bins}}, \%bin_data;
479 $src_packages->{$bin_data{bin_src}}{$bin_data{bin_src_version}}{bin_list}{$p}++;
485 return $src_packages;
489 my ( $dscname ) = @_;
491 my $parser = Parse::DebControl->new();
492 my ( $raw_data, $pkg_data );
494 my $dsccontent = $cache{$dscname};
495 unless ($dsccontent) {
496 open my $dscfh, "<", $dscname or do {
497 do_warning( "reading file $dscname failed" );
504 if (/^-----BEGIN PGP SIGNED MESSAGE/o) {
505 while (<$dscfh> =~ /\S/) {}; # skip Hash: line and similar
508 if (/^-----BEGIN PGP SIGNATURE/o) {
514 $cache{$dscname} = $dsccontent;
518 (my $begin = substr($dsccontent,0,20)) =~ s/\n/\\n/go;
519 do_debug( "CACHE HIT: $dscname ($begin)" );
523 unless ( $raw_data = $parser->parse_mem( $dsccontent,
524 { discardCase => 1 } ) ) {
525 do_warning( "parsing file $dscname failed.\n$dsccontent" );
529 my $no_chunks = @$raw_data;
530 if ($no_chunks != 1) {
531 do_warning( "expected exactly one chunk in .dsc file, got $no_chunks" );
536 src_name => $raw_data->[0]{source},
537 src_version => $raw_data->[0]{version},
541 unless( $pkg_data->{src_name} && defined($pkg_data->{src_version})
542 && $pkg_data->{dsc} ) {
544 do_error( "something fishy happened.\n", Dumper( $pkg_data ) );
547 do_debug( "found source package $pkg_data->{src_name}, version $pkg_data->{src_version}" );
554 my ( $debname ) = @_;
556 my $parser = Parse::DebControl->new();
557 my ( $raw_data, $pkg_data );
559 if ($cache{$debname}) {
562 (my $begin = substr($cache{$debname},0,20)) =~ s/\n/\\n/go;
563 do_debug( "CACHE HIT: $debname ($begin)" );
566 $cache{$debname} ||= qx/dpkg-deb --info "$debname" control/;
567 unless ( $cache{$debname} ) {
568 do_warning( "extracting control information of file $debname failed" );
571 my $control = $cache{$debname};
573 unless ( $raw_data = $parser->parse_mem( $control,
574 { discardCase => 1 } ) ) {
575 do_warning( "parsing control information <<$control>> of file $debname failed" );
579 my $no_chunks = @$raw_data;
580 if ($no_chunks != 1) {
581 do_warning( "expected exactly one chunk in .deb control information, got $no_chunks" );
586 bin_name => $raw_data->[0]{package},
587 bin_version => $raw_data->[0]{version},
588 bin_arch => $raw_data->[0]{architecture},
589 bin_src => $raw_data->[0]{source} || $raw_data->[0]{package},,
590 bin_src_version => $raw_data->[0]{version},
594 if ($pkg_data->{bin_src} =~ /^([\w.+-]+)\s*\(\s*=\s*([^\s\)])\s*\)\s*$/) {
595 $pkg_data->{bin_src} = $1;
596 $pkg_data->{bin_src_version} = $2;
599 do_debug( "found binary package $pkg_data->{bin_name}, version $pkg_data->{bin_version}, architecture $pkg_data->{bin_arch}" );
606 my ( $debname ) = @_;
608 do_debug( "processing deb file $debname" );
610 my $pkg_data = read_deb( $debname );
611 return unless $pkg_data;
613 if (exists $bin_packages{$pkg_data->{bin_name}}{$pkg_data->{bin_version}}{$pkg_data->{bin_arch}}) {
614 do_warning( "duplicated package $pkg_data->{bin_name}, version $pkg_data->{bin_version}, arch $pkg_data->{bin_arch}" );
617 $bin_packages{$pkg_data->{bin_name}}{$pkg_data->{bin_version}}{$pkg_data->{bin_arch}} = $pkg_data;
622 my ( $dscname ) = @_;
624 do_debug( "processing dsc file $dscname" );
626 my $pkg_data = read_dsc( $dscname );
627 return unless $pkg_data;
629 if (exists $src_packages{$pkg_data->{src_name}}{$pkg_data->{src_version}}) {
630 do_warning( "duplicated package $pkg_data->{src_name}, version {$pkg_data->{src_version}" );
633 $src_packages{$pkg_data->{src_name}}{$pkg_data->{src_version}} = $pkg_data;
640 do_debug( "processing directory $dir" );
642 opendir my $dh, $dir or do_error( "couldn't open directory $dir" );
643 while( my $entry = readdir $dh ) {
645 next if $entry =~ /^\.\.?$/o;
647 my $fullname = "$dir/$entry";
649 read_sub( $fullname ) if -d $fullname;
650 collect_dsc( $fullname ) if -f _ && ( $fullname =~ /\.dsc$/o );
651 collect_deb( $fullname ) if -f _ && ( $fullname =~ /\..?deb$/o );
653 closedir $dh or do_warning( "couldn't close directory $dir" );
656 ##################################################
659 do_info( "Using working directory $TEMPDIR" );
661 do_info( "load information from dump file" );
662 open DUMP, '<', $dumpfile
663 or do_error( "couldn't open dump file $dumpfile: $!" );
664 my $info = join "", <DUMP>;
666 close DUMP or do_warning( "couldn't close dump file: $!" );
668 do_info( "collect information (in $directory)" );
670 tie %cache, 'DB_File', $cachefile, O_CREAT|O_RDWR, 0640
671 or die "E: tie with file $cachefile failed: $!";
673 read_sub( $directory );
674 #FIXME: "untie attempted while 1 inner references still exist"
675 # untie %cache if tied %cache;
676 do_info( "postprocess collected information" );
677 merge_src_bin_packages( \%src_packages, \%bin_packages );
679 do_info( "dump backup of collected information" );
680 open DUMP, '>', $dumpfile
681 or do_error( "couldn't open dump file $dumpfile: $!" );
682 print DUMP Data::Dumper->Dump( [ \%src_packages ],
683 [ '*src_packages' ] );
684 close DUMP or do_warning( "couldn't close dump file: $!" );
687 do_info( "begin extracting files" );
688 extract_from_all( \%src_packages );
691 Source Packages: $stats{src_pkgs}
692 Cached Info: $stats{src_cache}
693 Already Extracted: $stats{already_extracted}
694 Binary Packages: $stats{bin_pkgs}
695 Cached Info: $stats{bin_cache}