]> git.deb.at Git - deb/packages.git/blob - bin/parse-packages
Parse scripts for Packages.gz files and Sources.gz files, to generate database
[deb/packages.git] / bin / parse-packages
1 #!/usr/bin/perl -w
2 # Convert Packages.gz files into Sleepycat db files for efficient usage of
3 # data
4 #
5 # $Id$
6 #
7 # Copyright (C) 2006  Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 # GNU General Public License for more details.
17
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
22 use strict;
23
24 use DB_File;
25 my %packages_small = ();
26 my %sources_packages = ();
27 my %descriptions = ();
28 my @descriptions = ("we count lines one-based\000");
29 my %packages_descriptions = ();
30 my %descriptions_packages = ();
31
32 my @suites = ('oldstable', 'stable', 'testing', 'unstable', 'experimental');
33
34 $/ = "";
35
36 for my $suite (@suites) {
37
38         print "Reading $suite...\n";
39         my %packages_all_db;
40         tie %packages_all_db, "DB_File", "packages_all_$suite.db.new",
41                 O_RDWR|O_CREAT, 0666, $DB_BTREE
42                 or die "Error creating DB: $!";
43         open PKG, "zcat /org/ftp.debian.org/ftp/dists/$suite/non-free/{,debian-installer/}binary-*/Packages.gz|";
44         while (<PKG>) {
45                 next if /^\s*$/;
46                 my $data = "";
47                 my %data = ();
48                 chomp;
49                 s/\n /\377/g;
50                 #s/\376\377\s*\376\377/\376\377/og;
51                 while (/^(\S+):\s*(.*)\s*$/mg) {
52                         my ($key, $value) = ($1, $2);
53                         $value =~ s/\377/\n /g;
54                         $data .= "$key: $value\n";
55                         $key =~ tr [A-Z] [a-z];
56                         $data{$key} = $value;
57                 }
58                 # Skip double package
59                 next if exists($packages_all_db{"$data{'package'} $data{'architecture'} $data{'version'}"});
60                 $packages_all_db{"$data{'package'} $data{'architecture'} $data{'version'}"}
61                         = $data;
62
63                 my $src = $data{'package'};
64                 my $srcversion = $data{'version'};
65                 if ($data{'source'}) {
66                         $src = $data{'source'};
67                         if ($src =~ /(\S+) \((\S+)\)/) {
68                                 $src = $1;
69                                 $srcversion = $2;
70                         }
71                 }
72                 my $descr = $data{'description'};
73                 my $did = undef;
74                 if (exists($descriptions{$descr})) {
75                         $did  = $descriptions{$descr};
76                 } else {
77                         $did = 1 + $#descriptions;
78                         $descriptions[$did] = $descr;
79                         $descriptions{$descr} = $did;
80                 }
81                 $packages_descriptions{"$data{'package'} $data{'version'} $data{'architecture'}"} = $did;
82                 $descriptions_packages{$did} .=
83                         "$data{'package'} $data{'version'} $data{'architecture'}\000";
84
85                 my $sdescr = $descr;
86                 $sdescr =~ s/\n.*//s;
87                 $packages_small{$data{'package'}} .= "$suite $data{'architecture'} ".
88                         "$data{'section'} $data{'priority'} $data{'version'} $sdescr\000";
89                 $sources_packages{$src} .=
90                         "$data{'package'} $data{'architecture'} $data{'version'}\000";
91         }
92
93         untie %packages_all_db;
94 }
95
96 print "Writing databases...\n";
97 my %packages_small_db;
98 tie %packages_small_db, "DB_File", "packages_small.db.new",
99         O_RDWR|O_CREAT, 0666, $DB_BTREE
100         or die "Error creating DB: $!";
101 while (my ($k, $v) = each(%packages_small)) {
102         $v =~ s/.$//s;
103         $packages_small_db{$k} = $v;
104 }
105 untie %packages_small_db;
106
107 my %sources_packages_db;
108 tie %sources_packages_db, "DB_File", "sources_packages.db.new",
109         O_RDWR|O_CREAT, 0666, $DB_BTREE
110         or die "Error creating DB: $!";
111 while (my ($k, $v) = each(%sources_packages)) {
112         $v =~ s/.$//s;
113         $sources_packages_db{$k} = $v;
114 }
115 untie %sources_packages_db;
116
117 my %packages_descriptions_db;
118 tie %packages_descriptions_db, "DB_File", "packages_descriptions.db.new",
119         O_RDWR|O_CREAT, 0666, $DB_BTREE
120         or die "Error creating DB: $!";
121 while (my ($k, $v) = each(%packages_descriptions)) {
122         $packages_descriptions_db{$k} = $v;
123 }
124 untie %packages_descriptions_db;
125
126 my %descriptions_packages_db;
127 tie %descriptions_packages_db, "DB_File", "descriptions_packages.db.new",
128         O_RDWR|O_CREAT, 0666, $DB_BTREE
129         or die "Error creating DB: $!";
130 while (my ($k, $v) = each(%descriptions_packages)) {
131         $v =~ s/.$//s;
132         $descriptions_packages_db{$k} = $v;
133 }
134 untie %descriptions_packages_db;
135
136 my %descriptions_db;
137 tie %descriptions_db, "DB_File", "descriptions.db.new",
138         O_RDWR|O_CREAT, 0666, $DB_BTREE
139         or die "Error creating DB: $!";
140 open DESCR, "> descriptions.txt" or die "Error creating descriptions textfile";
141 for (my $i=1; $i<= $#descriptions; $i++) {
142         my $null_d = $descriptions[$i];
143         $null_d =~ s/\n/\000/g;
144         print DESCR "$null_d\n";
145         $descriptions_db{$i} = $descriptions[$i];
146 }
147 close DESCR;
148 untie %descriptions_db;
149
150 rename("packages_small.db.new", "packages_small.db");
151 rename("sources_packages.db.new", "sources_packages.db");
152 for my $suite (@suites) {
153         rename("packages_all_$suite.db.new", "packages_all_$suite.db");
154 }
155 rename("packages_descriptions.db.new", "packages_descriptions.db");
156 rename("descriptions_packages.db.new", "descriptions_packages.db");
157 rename("descriptions.txt.new", "descriptions.txt");
158 rename("descriptions.db.new", "descriptions.db");