From 5e83da98cf5bd0100a850e32ede0e0b702044d7c Mon Sep 17 00:00:00 2001 From: Michael Casadevall Date: Sat, 3 Jan 2009 16:52:53 -0500 Subject: [PATCH] Added content import, merged master, added update2 script, added new quotes file, and added commands to dak.py Signed-off-by: Michael Casadevall --- dak/.generate_contents.py.swp | Bin 12288 -> 0 bytes dak/dak.py | 2 + dak/dakdb/update2.py | 80 ++++++++++++++++ dak/generate_contents.py | 2 +- dak/import_contents.py | 171 ++++++++++++++++++++++++++++++++++ dak/update_db.py | 2 +- daklib/database.py | 23 ++++- docs/README.quotes | 6 ++ 8 files changed, 281 insertions(+), 5 deletions(-) delete mode 100644 dak/.generate_contents.py.swp create mode 100644 dak/dakdb/update2.py create mode 100755 dak/import_contents.py diff --git a/dak/.generate_contents.py.swp b/dak/.generate_contents.py.swp deleted file mode 100644 index d4e83290431f9beed6b625eb22bb28b4ce8ee265..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2TW=dh6vwA6O({@7v@g8CsY4YTtsN%;+B!{z6Pk;ZTYaHQqsWZyu{~sO+MSK- zmm*Ll;1#J*iOV;j2noRhFGvUwyz?1=zJLV0BL1^pCvMuLBBT$McBP-!yEAiU&VSCC zag?cMU!7Z^)0q*5>q*8^&gHqU+5@b5g0VVZ*KhB6pn$61MU)Zs#60;^J6=Sb;q#5U57_%(3#==tx$#9!#91<4=|MU?^@9D-bIX zD-bIXD-bIXD-bIXD-bL2pemr;1MCe9{$S+5N_0Q4?H)bGw^)H#fmnf9fmnf9fmnf9 zfmnf9fmnf9fmnf9fd^0lUT5scA;#W*2!qG}|9gM`-+F?vpTLjcE$}ADfj=K->@)B# zcn3^_!{EDPjJ*%8gNxue_~9|ez6Sw#2^CT^AAxH?0Uw+M1df0Ka1$DT3f=>u z-Vb2B#6$jX3efh;;?hp0m~2p`VslGWDl{=cLzRjys8oh>_}~0)m$vtRZG`r>4{Sf5 zt-ToA7q51cZpSjDDXu$CBPCPREpa{Pbx~83G(m1NgTqQTo|02`V>R*0WG=UeX-upc zwE1Gz@s)ld?Y>&ej)#}2RM({LFz|U(AQtp@#KV%Sq+|Q5iCJN~i8Ta*E{NWuYnaaz zgKEhWBAJ+pj!vbgqN7u}TvRjBHRuOY2_Mb4t19k#GqcP+Urafc#qIjMw1pnOk@uQ` zrB7ZNe#g&nues4TKHYuj)6Sjca;N+RouZ^nt)Z_(g6T&3fUu#T1Z90JwtR{zALq@lSv5QK%T&Qx&=&B4i(Kt}j z^fOJN^ntVnW+pW61PWzZA79n+5WZ}hPL-S5J%f7TVURAZi$<8Bl!Pa{=kksLxpP6- z`PRoE%?O2VJ?hT4L6}`qEkWF?wGaU{6?nq$Ix^Vz5@WA&U;3e`LAoT!76L6DYCGO~ zXazcimU@Tty0%>pQB6{J3T)!!3a@8>Y2CQ33FZ&;XmK6bBz--ddTe#BIC>YMR(DCe z`=MLtsGdY|MXIF!z$Dj2iY20;O}7k6r*){&_RDWMZ44X^gvDB7ZJ_IQFFB}}iPVrL zB-ci|Vxfn!PW2odU*BA2(mh>66~eWEw#GG)y)V(j@~-N}`l4eC)(!3OYcGfq>fv)= zt^E&T*6drKIQk>-1iR3Cf}x(Zbey#M~kQf$O-!9z`Gu z%hgk1kgvGHv$XKtZQh5Z+<%1z>D&y*KGSHNC#OL*q_{5-gIpm&k$_NV0vXtrCz z6V%?y;a*PZY_lkrV*Tb?d_&qzvK+M0`I)(`v@KN&No|XEFH&{xVhDavYjs2W_6F!e z8}h@Pn1%iG>cFrzf>1UA+c4A!7IyOuHN#j{jG z2$+JF^&ku?>MfDR#7xF~e(UFmn^vGEDG*P*jhb?Y85GYh4%zETT)8p27 zw5b`?_OvDVsaLaUY{*n}8mi4bfz~TdKv*#Xvrdp&K!lo*8(3U9bZ|RbOWkZ{v5duA zAAmM#IjEYlM)R^JY%Gf~vU6#;Glr8_7mDi*3)L4c6sBhLi>3VJ-27Z=g%B9ibEU-# z#Uf2F6(~>3`9f)Ksyv@B&~mx3yi~l9Au3|v7z)}~v6x?)E6pyIOO#(+p-cHf0p?c5 zLlyKgf%Dzn#T(gYN_YcrJ#H)Q4-Ay_>KN&ygP|^cf*%FKxb}r=+{XNGiJ`m3_A!k4 zv7EYW;JEuBHV(Jr$!1GYaw + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# really, if we want to screw ourselves, let's find a better way. +# rm -rf /srv/ftp.debian.org + +################################################################################ + +import psycopg2, time + +################################################################################ + +def do_update(self): + print "Adding content fields to database" + + try: + c = self.db.cursor() + c.execute("""CREATE TABLE content_file_paths ( + id serial primary key not null, + path text unique not null + )""") + + c.execute("""CREATE TABLE content_file_names ( + id serial primary key not null, + file text unique not null + )""") + + c.execute("""CREATE TABLE content_associations ( + id serial not null, + binary_pkg int4 not null references binaries(id) on delete cascade, + filepath int4 not null references content_file_paths(id) on delete cascade, + filename int4 not null references content_file_names(id) on delete cascade + );""") + + c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text + AS $_$select case + WHEN $2 is null or $2 = '' THEN $1 + WHEN $1 is null or $1 = '' THEN $2 + ELSE $1 || ',' || $2 + END$_$ + LANGUAGE sql""") + + c.execute("""CREATE AGGREGATE comma_separated_list ( + BASETYPE = text, + SFUNC = comma_concat, + STYPE = text, + INITCOND = '' + );""") + + c.execute("UPDATE config SET value = '2' WHERE name = 'db_revision'") + self.db.commit() + + print "REMINDER: Remember to fully regenerate the Contents files before running import-contents" + print "" + print "Pausing for five seconds ..." + time.sleep (5) + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + print "FATAL: Unable to apply content table update 2!" + print "Error Message: " + str(msg) + print "Database changes have been rolled back." diff --git a/dak/generate_contents.py b/dak/generate_contents.py index 54b70bde..6d84d16b 100755 --- a/dak/generate_contents.py +++ b/dak/generate_contents.py @@ -66,7 +66,7 @@ def generate_contents(suites): h.close() # Get our suites, and the architectures - for s in suites: + for s in [i.lower() for i in suites]: suite_id = database.get_suite_id(s) q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id) diff --git a/dak/import_contents.py b/dak/import_contents.py new file mode 100755 index 00000000..945b9ea6 --- /dev/null +++ b/dak/import_contents.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python +# Import contents files + +# Copyright (C) 2008, 2009 Michael Casadevall + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ +################################################################################ + +################################################################################ + +import sys, os, popen2, tempfile, stat, time, pg +import re, gzip, apt_pkg +from daklib import database, utils +from daklib.dak_exceptions import * + +################################################################################ + +Cnf = None +projectB = None +out = None +AptCnf = None + +################################################################################ + +def usage (exit_code=0): + print """Usage: dak import-contents +Import Contents files + + -h, --help show this help and exit + -s, --suite=SUITE only write file lists for this suite +""" + sys.exit(exit_code) + +################################################################################ + +def import_contents(suites): + global projectB, Cnf + + # Start transaction + projectB.query("BEGIN WORK") + + # Needed to make sure postgreSQL doesn't freak out on some of the data + projectB.query("SET CLIENT_ENCODING TO 'LATIN1'") + + # Get our suites, and the architectures + for s in suites: + suite_id = database.get_suite_id(s) + + q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id) + + arch_list = [ ] + for r in q.getresult(): + if r[1] != "source" and r[1] != "all": + arch_list.append((r[0], r[1])) + + arch_all_id = database.get_architecture_id("all") + + for arch in arch_list: + print "Processing %s/%s" % (s, arch[1]) + arch_id = database.get_architecture_id(arch[1]) + f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch[1]), "r") + + # Get line count + lines = f.readlines() + num_of_lines = len(lines) + + # Ok, the file cursor is at the first entry, now comes the fun 'lets parse' bit + lines_processed = 0 + found_header = False + + for line in lines: + if found_header == False: + if not line: + print "Unable to find end of Contents-%s.gz header!" % ( arch[1]) + sys.exit(255) + + lines_processed += 1 + p = re.compile('^FILE') + if p.match(line): + found_header = True + continue + + # The format is simple enough, *filename*, *section/package1,section/package2,etc* + # Each file appears once per Contents file, so first, use some regex match + # to split the two bits + + # Print out progress bar + print "\rProcessed %d lines of %d (%%%.2f)" % (lines_processed, num_of_lines, (float(lines_processed)/num_of_lines)), + + # regex lifted from packages.d.o code + p = re.compile('^(.+?)\s+(\S+)$') + matchs = p.findall(line) + filename = matchs[0][0] + packages = matchs[0][1].split(',') + + # Iterate through each file's packages + for package in packages: + p = re.compile('(\S+)/(\S+)$') + matchs = p.findall(package) + + # Needed since the DB is unicode, and these files + # are ASCII + section_name = matchs[0][0] + package_name = matchs[0][1] + + section_id = database.get_section_id(section_name) + package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_id) + + if package_id == None: + # Likely got an arch all package + package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_all_id) + + database.insert_content_path(package_id, filename) + + lines_processed += 1 + f.close() + + # Commit work + print "Committing to database ..." + projectB.query("COMMIT") + +################################################################################ + +def main (): + global Cnf, projectB, out + out = sys.stdout + + Cnf = utils.get_conf() + + Arguments = [('h',"help","Import-Contents::Options::Help"), + ('s',"suite","Import-Contents::Options::Suite","HasArg"), + ] + + for i in [ "help", "suite" ]: + if not Cnf.has_key("Import-Contents::Options::%s" % (i)): + Cnf["Import-Contents::Options::%s" % (i)] = "" + + suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv) + Options = Cnf.SubTree("Import-Contents::Options") + + if Options["Help"]: + usage() + + if Options["Suite"]: + suites = utils.split_args(Options["Suite"]) + else: + suites = Cnf.SubTree("Suite").List() + + projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) + database.init(Cnf, projectB) + + import_contents(suites) + +####################################################################################### + +if __name__ == '__main__': + main() diff --git a/dak/update_db.py b/dak/update_db.py index e59a558c..7d89e6bf 100755 --- a/dak/update_db.py +++ b/dak/update_db.py @@ -36,7 +36,7 @@ from daklib import utils Cnf = None projectB = None -required_database_schema = 1 +required_database_schema = 2 ################################################################################ diff --git a/daklib/database.py b/daklib/database.py index 1f659606..c39c83b1 100755 --- a/daklib/database.py +++ b/daklib/database.py @@ -45,6 +45,7 @@ suite_version_cache = {} suite_bin_version_cache = {} content_path_id_cache = {} content_file_id_cache = {} +insert_contents_file_cache = {} ################################################################################ @@ -250,14 +251,14 @@ def get_suite_version(source, suite, arch): return version -def get_latest_binary_version_id(binary, suite, arch): +def get_latest_binary_version_id(binary, section, suite, arch): global suite_bin_version_cache - cache_key = "%s_%s" % (binary, suite) + cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch) if suite_bin_version_cache.has_key(cache_key): return suite_bin_version_cache[cache_key] - q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d'" % (binary, int(arch), int(suite))) + q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section))) highest_bid, highest_version = None, None @@ -266,6 +267,7 @@ def get_latest_binary_version_id(binary, suite, arch): highest_bid = bi[0] highest_version = bi[1] + suite_bin_version_cache[cache_key] = highest_bid return highest_bid ################################################################################ @@ -459,6 +461,14 @@ def get_or_set_contents_path_id(path): ################################################################################ def insert_content_path(bin_id, fullpath): + global insert_contents_file_cache + cache_key = "%s_%s" % (bin_id, fullpath) + + # have we seen this contents before? + # probably only revelant during package import + if insert_contents_file_cache.has_key(cache_key): + return + # split the path into basename, and pathname (path, file) = os.path.split(fullpath) @@ -466,6 +476,13 @@ def insert_content_path(bin_id, fullpath): file_id = get_or_set_contents_file_id(file) path_id = get_or_set_contents_path_id(path) + # Determine if we're inserting a duplicate row + q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) + if q.getresult(): + # Yes we are, return without doing the insert + print "Inserting dup row" + return + # Put them into content_assiocations projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) return diff --git a/docs/README.quotes b/docs/README.quotes index 3568ae7a..c696fbeb 100644 --- a/docs/README.quotes +++ b/docs/README.quotes @@ -344,3 +344,9 @@ Canadians: This is a lighthouse. Your call. elmo: I can't believe people pay you to fix computers %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +* Ganneff ponders how to best write the text to -devel. (need to tell em in + case they find more bugs). "We fixed the fucking idiotic broken implementation + to be less so" is probably not the nicest, even if perfect valid, way to say so + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -- 2.39.5