From: Mike O'Connor Date: Wed, 18 Feb 2009 13:51:30 +0000 (-0500) Subject: Contents generation should be working now X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1483a68353fe374254379f4560cc245fbf2523d4;p=dak Contents generation should be working now * rename mhy's daklib/Foo.py to daklib/foo.py * add temporary tables which are populated during p-u * copy from the temporary tables during p-a Signed-off-by: Mike O'Connor --- diff --git a/dak/contents.py b/dak/contents.py index 5bb1b147..2e1d05dc 100644 --- a/dak/contents.py +++ b/dak/contents.py @@ -215,6 +215,7 @@ class Contents(object): WHERE ca.id IS NULL)""" ); cursor.execute( "COMMIT" ) + def bootstrap(self): """ scan the existing debs in the pool to populate the contents database tables diff --git a/dak/dak.py b/dak/dak.py index 638f3dfa..aa6efdee 100755 --- a/dak/dak.py +++ b/dak/dak.py @@ -171,6 +171,8 @@ def init(): "Split queue/done into a date-based hierarchy"), ("stats", "Generate statistics"), + ("calculate-shasums", + "Calculate missing sha1sums and sha256sums"), ("bts-categorize", "Categorize uncategorized bugs filed against ftp.debian.org"), ] diff --git a/dak/dakdb/update4.py b/dak/dakdb/update4.py index f6a2db06..f707a311 100644 --- a/dak/dakdb/update4.py +++ b/dak/dakdb/update4.py @@ -56,6 +56,14 @@ def do_update(self): filename int4 not null references content_file_names(id) on delete cascade );""") + c.execute("""CREATE TABLE temp_content_associations ( + id serial not null, + package text not null, + version debversion not null, + filepath int4 not null references content_file_paths(id) on delete cascade, + filename int4 not null references content_file_names(id) on delete cascade + );""") + c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text AS $_$select case WHEN $2 is null or $2 = '' THEN $1 diff --git a/dak/process_accepted.py b/dak/process_accepted.py index 941acd95..e597a8e2 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -374,12 +374,9 @@ def install (): suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id)) - # insert contents into the database - contents = utils.generate_contents_information(file) - q = projectB.query("SELECT currval('binaries_id_seq')") - bin_id = int(q.getresult()[0][0]) - for file in contents: - database.insert_content_path(bin_id, file) + + if not database.copy_temporary_contents(package, version, files[newfile]): + reject("Missing contents for package") # If the .orig.tar.gz is in a legacy directory we need to poolify # it, so that apt-get source (and anything else that goes by the diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py index 1ec04563..5097b24a 100755 --- a/dak/process_unchecked.py +++ b/dak/process_unchecked.py @@ -28,9 +28,11 @@ ################################################################################ -import commands, errno, fcntl, os, re, shutil, stat, sys, time, tempfile, traceback +import commands, errno, fcntl, os, re, shutil, stat, sys, time, tempfile, traceback, tarfile import apt_inst, apt_pkg -from daklib import database +from debian_bundle import deb822 +from daklib.dbconn import DBConn +from daklib.binary import Binary from daklib import logging from daklib import queue from daklib import utils @@ -121,6 +123,16 @@ def reject (str, prefix="Rejected: "): ################################################################################ +def create_tmpdir(): + """ + Create a temporary directory that can be used for unpacking files into for + checking + """ + tmpdir = tempfile.mkdtemp() + return tmpdir + +################################################################################ + def copy_to_holding(filename): global in_holding @@ -322,33 +334,6 @@ def check_distributions(): ################################################################################ -def check_deb_ar(filename): - """ - Sanity check the ar of a .deb, i.e. that there is: - - 1. debian-binary - 2. control.tar.gz - 3. data.tar.gz or data.tar.bz2 - - in that order, and nothing else. - """ - cmd = "ar t %s" % (filename) - (result, output) = commands.getstatusoutput(cmd) - if result != 0: - reject("%s: 'ar t' invocation failed." % (filename)) - reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") - chunks = output.split('\n') - if len(chunks) != 3: - reject("%s: found %d chunks, expected 3." % (filename, len(chunks))) - if chunks[0] != "debian-binary": - reject("%s: first chunk is '%s', expected 'debian-binary'." % (filename, chunks[0])) - if chunks[1] != "control.tar.gz": - reject("%s: second chunk is '%s', expected 'control.tar.gz'." % (filename, chunks[1])) - if chunks[2] not in [ "data.tar.bz2", "data.tar.gz" ]: - reject("%s: third chunk is '%s', expected 'data.tar.gz' or 'data.tar.bz2'." % (filename, chunks[2])) - -################################################################################ - def check_files(): global reprocess @@ -387,6 +372,19 @@ def check_files(): has_binaries = 0 has_source = 0 + cursor = DBConn().cursor() + # Check for packages that have moved from one component to another + # STU: this should probably be changed to not join on architecture, suite tables but instead to used their cached name->id mappings from DBConn + cursor.execute("""PREPARE moved_pkg_q AS + SELECT c.name FROM binaries b, bin_associations ba, suite s, location l, + component c, architecture a, files f + WHERE b.package = $1 AND s.suite_name = $2 + AND (a.arch_string = $3 OR a.arch_string = 'all') + AND ba.bin = b.id AND ba.suite = s.id AND b.architecture = a.id + AND f.location = l.id + AND l.component = c.id + AND b.file = f.id""") + for f in file_keys: # Ensure the file does not already exist in one of the accepted directories for d in [ "Accepted", "Byhand", "New", "ProposedUpdates", "OldProposedUpdates", "Embargoed", "Unembargoed" ]: @@ -549,7 +547,7 @@ def check_files(): # Check the version and for file overwrites reject(Upload.check_binary_against_db(f),"") - check_deb_ar(f) + Binary(f).scan_package() # Checks for a source package... else: @@ -609,7 +607,7 @@ def check_files(): # Validate the component component = files[f]["component"] - component_id = database.get_component_id(component) + component_id = DBConn().get_component_id(component) if component_id == -1: reject("file '%s' has unknown component '%s'." % (f, component)) continue @@ -624,14 +622,14 @@ def check_files(): # Determine the location location = Cnf["Dir::Pool"] - location_id = database.get_location_id (location, component, archive) + location_id = DBConn().get_location_id(location, component, archive) if location_id == -1: reject("[INTERNAL ERROR] couldn't determine location (Component: %s, Archive: %s)" % (component, archive)) files[f]["location id"] = location_id # Check the md5sum & size against existing files (if any) files[f]["pool name"] = utils.poolify (changes["source"], files[f]["component"]) - files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"]) + files_id = DBConn().get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"]) if files_id == -1: reject("INTERNAL ERROR, get_files_id() returned multiple matches for %s." % (f)) elif files_id == -2: @@ -639,16 +637,9 @@ def check_files(): files[f]["files id"] = files_id # Check for packages that have moved from one component to another - q = Upload.projectB.query(""" -SELECT c.name FROM binaries b, bin_associations ba, suite s, location l, - component c, architecture a, files f - WHERE b.package = '%s' AND s.suite_name = '%s' - AND (a.arch_string = '%s' OR a.arch_string = 'all') - AND ba.bin = b.id AND ba.suite = s.id AND b.architecture = a.id - AND f.location = l.id AND l.component = c.id AND b.file = f.id""" - % (files[f]["package"], suite, - files[f]["architecture"])) - ql = q.getresult() + files[f]['suite'] = suite + cursor.execute("""EXECUTE moved_pkg_q( %(package)s, %(suite)s, %(architecture)s )""", ( files[f] ) ) + ql = cursor.fetchone() if ql: files[f]["othercomponents"] = ql[0][0] @@ -870,13 +861,7 @@ def check_source(): or pkg.orig_tar_gz == -1: return - # Create a temporary directory to extract the source into - if Options["No-Action"]: - tmpdir = tempfile.mkdtemp() - else: - # We're in queue/holding and can create a random directory. - tmpdir = "%s" % (os.getpid()) - os.mkdir(tmpdir) + tmpdir = create_tmpdir() # Move into the temporary directory cwd = os.getcwd() @@ -997,12 +982,21 @@ def check_timestamps(): ################################################################################ def lookup_uid_from_fingerprint(fpr): - q = Upload.projectB.query("SELECT u.uid, u.name, k.debian_maintainer FROM fingerprint f JOIN keyrings k ON (f.keyring=k.id), uid u WHERE f.uid = u.id AND f.fingerprint = '%s'" % (fpr)) - qs = q.getresult() - if len(qs) == 0: - return (None, None, None) + """ + Return the uid,name,isdm for a given gpg fingerprint + + @ptype fpr: string + @param fpr: a 40 byte GPG fingerprint + + @return (uid, name, isdm) + """ + cursor = DBConn().cursor() + cursor.execute( "SELECT u.uid, u.name, k.debian_maintainer FROM fingerprint f JOIN keyrings k ON (f.keyring=k.id), uid u WHERE f.uid = u.id AND f.fingerprint = '%s'" % (fpr)) + qs = cursor.fetchone() + if qs: + return qs else: - return qs[0] + return (None, None, None) def check_signed_by_key(): """Ensure the .changes is signed by an authorized uploader.""" @@ -1043,12 +1037,16 @@ def check_signed_by_key(): if not sponsored and not may_nmu: source_ids = [] - q = Upload.projectB.query("SELECT s.id, s.version FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = '%s' AND s.dm_upload_allowed = 'yes'" % (changes["source"])) + cursor.execute( "SELECT s.id, s.version FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = %(source)s AND s.dm_upload_allowed = 'yes'", changes ) highest_sid, highest_version = None, None should_reject = True - for si in q.getresult(): + while True: + si = cursor.fetchone() + if not si: + break + if highest_version == None or apt_pkg.VersionCompare(si[1], highest_version) == 1: highest_sid = si[0] highest_version = si[1] @@ -1056,8 +1054,14 @@ def check_signed_by_key(): if highest_sid == None: reject("Source package %s does not have 'DM-Upload-Allowed: yes' in its most recent version" % changes["source"]) else: - q = Upload.projectB.query("SELECT m.name FROM maintainer m WHERE m.id IN (SELECT su.maintainer FROM src_uploaders su JOIN source s ON (s.id = su.source) WHERE su.source = %s)" % (highest_sid)) - for m in q.getresult(): + + cursor.execute("SELECT m.name FROM maintainer m WHERE m.id IN (SELECT su.maintainer FROM src_uploaders su JOIN source s ON (s.id = su.source) WHERE su.source = %s)" % (highest_sid)) + + while True: + m = cursor.fetchone() + if not m: + break + (rfc822, rfc2047, name, email) = utils.fix_maintainer(m[0]) if email == uid_email or name == uid_name: should_reject=False @@ -1068,9 +1072,14 @@ def check_signed_by_key(): for b in changes["binary"].keys(): for suite in changes["distribution"].keys(): - suite_id = database.get_suite_id(suite) - q = Upload.projectB.query("SELECT DISTINCT s.source FROM source s JOIN binaries b ON (s.id = b.source) JOIN bin_associations ba On (b.id = ba.bin) WHERE b.package = '%s' AND ba.suite = %s" % (b, suite_id)) - for s in q.getresult(): + suite_id = DBConn().get_suite_id(suite) + + cursor.execute("SELECT DISTINCT s.source FROM source s JOIN binaries b ON (s.id = b.source) JOIN bin_associations ba On (b.id = ba.bin) WHERE b.package = %(package)s AND ba.suite = %(suite)s" , {'package':b, 'suite':suite_id} ) + while True: + s = cursor.fetchone() + if not s: + break + if s[0] != changes["source"]: reject("%s may not hijack %s from source package %s in suite %s" % (uid, b, s, suite)) @@ -1214,11 +1223,9 @@ def move_to_dir (dest, perms=0660, changesperms=0664): ################################################################################ def is_unembargo (): - q = Upload.projectB.query( - "SELECT package FROM disembargo WHERE package = '%s' AND version = '%s'" % - (changes["source"], changes["version"])) - ql = q.getresult() - if ql: + cursor = DBConn().cursor() + cursor.execute( "SELECT package FROM disembargo WHERE package = %(source)s AND version = %(version)s", changes ) + if cursor.fetchone(): return 1 oldcwd = os.getcwd() @@ -1230,9 +1237,9 @@ def is_unembargo (): if changes["architecture"].has_key("source"): if Options["No-Action"]: return 1 - Upload.projectB.query( - "INSERT INTO disembargo (package, version) VALUES ('%s', '%s')" % - (changes["source"], changes["version"])) + cursor.execute( "INSERT INTO disembargo (package, version) VALUES ('%(package)s', '%(version)s')", + changes ) + cursor.execute( "COMMIT" ) return 1 return 0 @@ -1290,12 +1297,18 @@ def is_stableupdate (): return 0 if not changes["architecture"].has_key("source"): - pusuite = database.get_suite_id("proposed-updates") - q = Upload.projectB.query( - "SELECT S.source FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = '%s' AND s.version = '%s' AND sa.suite = %d" % - (changes["source"], changes["version"], pusuite)) - ql = q.getresult() - if ql: + pusuite = DBConn().get_suite_id("proposed-updates") + cursor = DBConn().cursor() + cursor.execute( """SELECT 1 FROM source s + JOIN src_associations sa ON (s.id = sa.source) + WHERE s.source = %(source)s + AND s.version = '%(version)s' + AND sa.suite = %(suite)d""", + {'source' : changes['source'], + 'version' : changes['version'], + 'suite' : pasuite}) + + if cursor.fetchone(): # source is already in proposed-updates so no need to hold return 0 @@ -1319,13 +1332,17 @@ def is_oldstableupdate (): return 0 if not changes["architecture"].has_key("source"): - pusuite = database.get_suite_id("oldstable-proposed-updates") - q = Upload.projectB.query( - "SELECT S.source FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = '%s' AND s.version = '%s' AND sa.suite = %d" % - (changes["source"], changes["version"], pusuite)) - ql = q.getresult() - if ql: - # source is already in oldstable-proposed-updates so no need to hold + pusuite = DBConn().get_suite_id("oldstable-proposed-updates") + cursor = DBConn().cursor() + cursor.execute( """"SELECT 1 FROM source s + JOIN src_associations sa ON (s.id = sa.source) + WHERE s.source = %(source)s + AND s.version = %(version)s + AND sa.suite = %d""", + {'source' : changes['source'], + 'version' : changes['version'], + 'suite' : pasuite}) + if cursor.fetchone(): return 0 return 1 diff --git a/daklib/binary.py b/daklib/binary.py new file mode 100755 index 00000000..70133db6 --- /dev/null +++ b/daklib/binary.py @@ -0,0 +1,151 @@ +#!/usr/bin/python + +""" +Functions related debian binary packages + +@contact: Debian FTPMaster +@copyright: 2009 Mike O'Connor +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import os +import shutil +import tempfile +import tarfile +import commands +import traceback +from debian_bundle import deb822 +from dbconn import DBConn + +class Binary(object): + def __init__(self, filename): + self.filename = filename + self.tmpdir = None + self.chunks = None + + def __del__(self): + # we need to remove the temporary directory, if we created one + if self.tmpdir and os.path.exists(self.tmpdir): + shutil.rmtree(self.tmpdir) + + def __scan_ar(self): + # get a list of the ar contents + if not self.chunks: + + cmd = "ar t %s" % (self.filename) + + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + rejected = True + reject("%s: 'ar t' invocation failed." % (self.filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + self.chunks = output.split('\n') + + + + def __unpack(self): + # Internal function which extracts the contents of the .ar to + # a temporary directory + + if not self.tmpdir: + tmpdir = tempfile.mkdtemp() + cwd = os.getcwd() + try: + os.chdir( tmpdir ) + cmd = "ar x %s %s %s" % (os.path.join(cwd,self.filename), self.chunks[1], self.chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: '%s' invocation failed." % (filename, cmd)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + else: + self.tmpdir = tmpdir + + finally: + os.chdir( cwd ) + + def valid_deb(self): + """ + Check deb contents making sure the .deb contains: + 1. debian-binary + 2. control.tar.gz + 3. data.tar.gz or data.tar.bz2 + in that order, and nothing else. + """ + self.__scan_ar() + rejected = not self.chunks + if len(self.chunks) != 3: + rejected = True + reject("%s: found %d chunks, expected 3." % (self.filename, len(self.chunks))) + if self.chunks[0] != "debian-binary": + rejected = True + reject("%s: first chunk is '%s', expected 'debian-binary'." % (self.filename, self.chunks[0])) + if self.chunks[1] != "control.tar.gz": + rejected = True + reject("%s: second chunk is '%s', expected 'control.tar.gz'." % (self.filename, self.chunks[1])) + if self.chunks[2] not in [ "data.tar.bz2", "data.tar.gz" ]: + rejected = True + reject("%s: third chunk is '%s', expected 'data.tar.gz' or 'data.tar.bz2'." % (self.filename, self.chunks[2])) + + return not rejected + + def scan_package(self): + """ + Unpack the .deb, do sanity checking, and gather info from it. + + Currently information gathering consists of getting the contents list. In + the hopefully near future, it should also include gathering info from the + control file. + + @return True if the deb is valid and contents were imported + """ + rejected = not self.valid_deb() + self.__unpack() + + if not rejected and self.tmpdir: + cwd = os.getcwd() + try: + os.chdir(self.tmpdir) + if self.chunks[1] == "control.tar.gz": + control = tarfile.open(os.path.join(self.tmpdir, "control.tar.gz" ), "r:gz") + elif self.chunks[1] == "control.tar.bz2": + control = tarfile.open(os.path.join(self.tmpdir, "control.tar.bz2" ), "r:bz2") + + pkg = deb822.Packages.iter_paragraphs( control.extractfile('./control') ).next() + + if self.chunks[2] == "data.tar.gz": + data = tarfile.open(os.path.join(self.tmpdir, "data.tar.gz"), "r:gz") + elif self.chunks[2] == "data.tar.bz2": + data = tarfile.open(os.path.join(self.tmpdir, "data.tar.bz2" ), "r:bz2") + + return DBConn().insert_content_paths(pkg, [ tarinfo.name for tarinfo in data if tarinfo.isdir()]) + + except: + traceback.print_exc() + + return False + + finally: + os.chdir( cwd ) + + + + +if __name__ == "__main__": + Binary( "/srv/ftp.debian.org/queue/accepted/halevt_0.1.3-2_amd64.deb" ).scan_package() + diff --git a/daklib/Config.py b/daklib/config.py old mode 100644 new mode 100755 similarity index 98% rename from daklib/Config.py rename to daklib/config.py index 96fdb8f5..997a597d --- a/daklib/Config.py +++ b/daklib/config.py @@ -31,7 +31,7 @@ Config access class import apt_pkg import socket -from Singleton import Singleton +from singleton import Singleton ################################################################################ diff --git a/daklib/database.py b/daklib/database.py index 3cbb67b7..5818733e 100755 --- a/daklib/database.py +++ b/daklib/database.py @@ -54,9 +54,6 @@ queue_id_cache = {} #: cache for queues uid_id_cache = {} #: cache for uids suite_version_cache = {} #: cache for suite_versions (packages) suite_bin_version_cache = {} -content_path_id_cache = {} -content_file_id_cache = {} -insert_contents_file_cache = {} cache_preloaded = False ################################################################################ @@ -781,58 +778,45 @@ def get_suites(pkgname, src=False): q = projectB.query(sql) return map(lambda x: x[0], q.getresult()) -################################################################################ - -def get_or_set_contents_file_id(file): - global content_file_id_cache - - if not content_file_id_cache.has_key(file): - sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file - q = projectB.query(sql_select) - if not q.getresult(): - # since this can be called within a transaction, we can't use currval - q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file)) - content_file_id_cache[file] = int(q.getresult()[0][0]) - return content_file_id_cache[file] ################################################################################ -def get_or_set_contents_path_id(path): - global content_path_id_cache +def copy_temporary_contents(package, version, deb): + """ + copy the previously stored contents from the temp table to the permanant one - if not content_path_id_cache.has_key(path): - sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path - q = projectB.query(sql_select) - if not q.getresult(): - # since this can be called within a transaction, we can't use currval - q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path)) - content_path_id_cache[path] = int(q.getresult()[0][0]) - return content_path_id_cache[path] + during process-unchecked, the deb should have been scanned and the + contents stored in temp_content_associations + """ -################################################################################ + # first see if contents exist: -def insert_content_path(bin_id, fullpath): - global insert_contents_file_cache - cache_key = "%s_%s" % (bin_id, fullpath) + exists = projectB.query("""SELECT 1 FROM temp_content_associations + WHERE package='%s' LIMIT 1""" % package ).getresult() - # have we seen this contents before? - # probably only revelant during package import - if insert_contents_file_cache.has_key(cache_key): - return + if not exists: + # This should NOT happen. We should have added contents + # during process-unchecked. if it did, log an error, and send + # an email. + subst = { + "__PACKAGE__": package, + "__VERSION__": version, + "__DAK_ADDRESS__": Cnf["Dinstall::MyEmailAddress"] + } - # split the path into basename, and pathname - (path, file) = os.path.split(fullpath) + message = utils.TemplateSubst(Subst, Cnf["Dir::Templates"]+"/bts-categorize") + utils.send_mail( message ) - # Get the necessary IDs ... - file_id = get_or_set_contents_file_id(file) - path_id = get_or_set_contents_path_id(path) + exists = DBConn().insert_content_path(package, version, deb) - # Determine if we're inserting a duplicate row - q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) - if q.getresult(): - # Yes we are, return without doing the insert - return + if exists: + sql = """INSERT INTO content_associations(binary_pkg,filepath,filename) + SELECT currval('binaries_id_seq'), filepath, filename FROM temp_content_associations + WHERE package='%s' + AND version='%s'""" % (package, version) + projectB.query(sql) + projectB.query("""DELETE from temp_content_associations + WHERE package='%s' + AND version='%s'""" % (package, version)) - # Put them into content_assiocations - projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) - return + return exists diff --git a/daklib/DBConn.py b/daklib/dbconn.py old mode 100644 new mode 100755 similarity index 77% rename from daklib/DBConn.py rename to daklib/dbconn.py index 75afb5a7..5bd1d765 --- a/daklib/DBConn.py +++ b/daklib/dbconn.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python """ DB access class @@ -35,9 +35,10 @@ import os import psycopg2 +import traceback -from Singleton import Singleton -from Config import Config +from singleton import Singleton +from config import Config ################################################################################ @@ -104,7 +105,7 @@ class DBConn(Singleton): 'maintainer': {}, # TODO 'keyring': {}, # TODO 'source': Cache(lambda x: '%s_%s_' % (x['source'], x['version'])), - 'files': {}, # TODO + 'files': Cache(lambda x: '%s_%s_' % (x['filename'], x['location'])), 'maintainer': {}, # TODO 'fingerprint': {}, # TODO 'queue': {}, # TODO @@ -277,8 +278,10 @@ class DBConn(Singleton): if component: component_id = self.get_component_id(component) if component_id: - res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND component=%(component)d AND archive=%(archive)d", - {'location': location, 'archive': archive_id, 'component': component_id}, cachename='location') + res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND component=%(component)s AND archive=%(archive)s", + {'location': location, + 'archive': int(archive_id), + 'component': component_id}, cachename='location') else: res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND archive=%(archive)d", {'location': location, 'archive': archive_id, 'component': ''}, cachename='location') @@ -332,6 +335,71 @@ class DBConn(Singleton): AND s.source=%(source)""", {'suite': suite, 'source': source}, cachename='suite_version') + def get_files_id (self, filename, size, md5sum, location_id): + """ + Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an + existing copy. + + The database is queried using the C{filename} and C{location_id}. If a file does exist + at that location, the existing size and md5sum are checked against the provided + parameters. A size or checksum mismatch returns -2. If more than one entry is + found within the database, a -1 is returned, no result returns None, otherwise + the file id. + + Results are kept in a cache during runtime to minimize database queries. + + @type filename: string + @param filename: the filename of the file to check against the DB + + @type size: int + @param size: the size of the file to check against the DB + + @type md5sum: string + @param md5sum: the md5sum of the file to check against the DB + + @type location_id: int + @param location_id: the id of the location as returned by L{get_location_id} + + @rtype: int / None + @return: Various return values are possible: + - -2: size/checksum error + - -1: more than one file found in database + - None: no file found in database + - int: file id + + """ + values = {'filename' : filename, + 'location' : location_id} + + res = self.caches['files'].GetValue( values ) + + if not res: + query = """SELECT id, size, md5sum + FROM files + WHERE filename = %(filename)s AND location = %(location)s""" + + cursor = self.db_con.cursor() + cursor.execute( query, values ) + + if cursor.rowcount == 0: + res = None + + elif cursor.rowcount != 1: + res = -1 + + else: + row = cursor.fetchone() + + if row[1] != size or row[2] != md5sum: + res = -2 + + else: + self.caches[cachename].SetValue(values, row[0]) + res = row[0] + + return res + + def get_or_set_contents_file_id(self, filename): """ Returns database id for given filename. @@ -384,7 +452,7 @@ class DBConn(Singleton): return id - def insert_content_paths(self, bin_id, fullpaths): + def insert_content_paths(self, package, fullpaths): """ Make sure given path is associated with given binary id @@ -392,23 +460,34 @@ class DBConn(Singleton): @param bin_id: the id of the binary @type fullpath: string @param fullpath: the path of the file being associated with the binary + + @return True upon success """ c = self.db_con.cursor() - for fullpath in fullpaths: - c.execute( "BEGIN WORK" ) - (path, file) = os.path.split(fullpath) - - # Get the necessary IDs ... - file_id = self.get_or_set_contents_file_id(file) - path_id = self.get_or_set_contents_path_id(path) - - # Determine if we're inserting a duplicate row - - c.execute("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) - if not c.fetchone(): - # no, we are not, do the insert + c.execute("BEGIN WORK") + try: - c.execute("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) - c.execute( "COMMIT" ) + # Remove any already existing recorded files for this package + c.execute("""DELETE FROM temp_content_associations + WHERE package=%(Package)s + AND version=%(Version)s""", package ) + + for fullpath in fullpaths: + (path, file) = os.path.split(fullpath) + + # Get the necessary IDs ... + file_id = self.get_or_set_contents_file_id(file) + path_id = self.get_or_set_contents_path_id(path) + + c.execute("""INSERT INTO temp_content_associations + (package, version, filepath, filename) + VALUES (%%(Package)s, %%(Version)s, '%d', '%d')""" % (path_id, file_id), + package ) + c.execute("COMMIT") + return True + except: + traceback.print_exc() + c.execute("ROLLBACK") + return False diff --git a/daklib/Singleton.py b/daklib/singleton.py similarity index 100% rename from daklib/Singleton.py rename to daklib/singleton.py diff --git a/daklib/utils.py b/daklib/utils.py index 9582dadb..5cb502d6 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -1443,7 +1443,7 @@ def generate_contents_information(filename): Generate a list of flies contained in a .deb @type filename: string - @param filename: the path to a .deb + @param filename: the path to a data.tar.gz or data.tar.bz2 @rtype: list @return: a list of files in the data.tar.* portion of the .deb @@ -1481,6 +1481,7 @@ def generate_contents_information(filename): finally: if os.path.exists( chunks[2] ): + shutil.rmtree( chunks[2] ) os.remove( chunks[2] ) return contents diff --git a/templates/missing-contents b/templates/missing-contents new file mode 100644 index 00000000..a4477fa7 --- /dev/null +++ b/templates/missing-contents @@ -0,0 +1,14 @@ +From: __DAK_ADDRESS__ +X-Debian: DAK +X-Debian-Package: __PACKAGE__ +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit +Subject: Missing contents for __PACKAGE__ in accepted queue + +While processing the accepted queue, I didn't have contents in the +database for __PACKAGE__ version __VERSION__. These contents should +have been put into the database by process-unchecked when the package +first arrived. + +This is probably stew's fault. \ No newline at end of file diff --git a/templates/process-unchecked.new b/templates/process-unchecked.new index 6c3162fa..22c59759 100644 --- a/templates/process-unchecked.new +++ b/templates/process-unchecked.new @@ -2,7 +2,7 @@ From: __DAK_ADDRESS__ To: __MAINTAINER_TO__ __BCC__ X-Debian: DAK -X-Debian-Package: __SOURCE__ +X-Debian-Package: __PACKAGE__ Precedence: bulk MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8"