From: Mike O'Connor Date: Fri, 13 Mar 2009 13:14:54 +0000 (-0400) Subject: fixing Binary.reject. adding better logging when we get an error in contents insertion X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5c4242ac752fcc98787ecf0d84927f907808e36e;p=dak fixing Binary.reject. adding better logging when we get an error in contents insertion Signed-off-by: Mike O'Connor --- diff --git a/dak/contents.py b/dak/contents.py index eaa1fb6b..62155748 100644 --- a/dak/contents.py +++ b/dak/contents.py @@ -178,6 +178,9 @@ class Contents(object): def __init__(self): self.header = None + def reject(self, message): + log.error("E: %s" % message) + def _getHeader(self): """ Internal method to return the header for Contents.gz files @@ -226,7 +229,7 @@ class Contents(object): return num_tabs = max(1, - int(math.ceil((self._goal_column - len(contents[0])) / 8))) + int(math.ceil((self._goal_column - len(contents[0])-1) / 8))) f.write(contents[0] + ( '\t' * num_tabs ) + contents[-1] + "\n") finally: @@ -279,7 +282,7 @@ class Contents(object): else: debfile = os.path.join( pooldir, deb[1] ) if os.path.exists( debfile ): - Binary(debfile).scan_package( deb[0] ) + Binary(debfile, self.reject).scan_package( deb[0] ) else: log.error( "missing .deb: %s" % deb[1] ) diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py index 4b731a17..dd97b6c7 100755 --- a/dak/process_unchecked.py +++ b/dak/process_unchecked.py @@ -572,7 +572,7 @@ def check_files(): # Check the version and for file overwrites reject(Upload.check_binary_against_db(f),"") - Binary(f).scan_package() + Binary(f, reject).scan_package( ) # Checks for a source package... else: diff --git a/daklib/binary.py b/daklib/binary.py index a81f428f..8dd9b301 100755 --- a/daklib/binary.py +++ b/daklib/binary.py @@ -40,6 +40,7 @@ Functions related debian binary packages ################################################################################ import os +import sys import shutil import tempfile import tarfile @@ -48,12 +49,33 @@ import traceback import atexit from debian_bundle import deb822 from dbconn import DBConn +from config import Config +import logging +import utils class Binary(object): - def __init__(self, filename): + def __init__(self, filename, reject=None): + """ + @ptype filename: string + @param filename: path of a .deb + + @ptype reject: function + @param reject: a function to log reject messages to + """ self.filename = filename self.tmpdir = None self.chunks = None + self.wrapped_reject = reject + + def reject(self, message): + """ + if we were given a reject function, send the reject message, + otherwise send it to stderr. + """ + if self.wrapped_reject: + self.wrapped_reject(message) + else: + print >> sys.stderr, message def __del__(self): """ @@ -74,12 +96,11 @@ class Binary(object): if not self.chunks: cmd = "ar t %s" % (self.filename) - (result, output) = commands.getstatusoutput(cmd) if result != 0: rejected = True - reject("%s: 'ar t' invocation failed." % (self.filename)) - reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + self.reject("%s: 'ar t' invocation failed." % (self.filename)) + self.reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") self.chunks = output.split('\n') @@ -96,8 +117,8 @@ class Binary(object): cmd = "ar x %s %s %s" % (os.path.join(cwd,self.filename), self.chunks[1], self.chunks[2]) (result, output) = commands.getstatusoutput(cmd) if result != 0: - reject("%s: '%s' invocation failed." % (filename, cmd)) - reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + self.reject("%s: '%s' invocation failed." % (self.filename, cmd)) + self.reject(utils.prefix_multi_line_string(output, " [ar output:] ")) else: self.tmpdir = tmpdir atexit.register( self._cleanup ) @@ -117,16 +138,16 @@ class Binary(object): rejected = not self.chunks if len(self.chunks) != 3: rejected = True - reject("%s: found %d chunks, expected 3." % (self.filename, len(self.chunks))) + self.reject("%s: found %d chunks, expected 3." % (self.filename, len(self.chunks))) if self.chunks[0] != "debian-binary": rejected = True - reject("%s: first chunk is '%s', expected 'debian-binary'." % (self.filename, self.chunks[0])) + self.reject("%s: first chunk is '%s', expected 'debian-binary'." % (self.filename, self.chunks[0])) if self.chunks[1] != "control.tar.gz": rejected = True - reject("%s: second chunk is '%s', expected 'control.tar.gz'." % (self.filename, self.chunks[1])) + self.reject("%s: second chunk is '%s', expected 'control.tar.gz'." % (self.filename, self.chunks[1])) if self.chunks[2] not in [ "data.tar.bz2", "data.tar.gz" ]: rejected = True - reject("%s: third chunk is '%s', expected 'data.tar.gz' or 'data.tar.bz2'." % (self.filename, self.chunks[2])) + self.reject("%s: third chunk is '%s', expected 'data.tar.gz' or 'data.tar.bz2'." % (self.filename, self.chunks[2])) return not rejected @@ -150,8 +171,8 @@ class Binary(object): result = False + cwd = os.getcwd() if not rejected and self.tmpdir: - cwd = os.getcwd() try: os.chdir(self.tmpdir) if self.chunks[1] == "control.tar.gz": @@ -175,6 +196,48 @@ class Binary(object): os.chdir(cwd) return result + def check_utf8_package(self, package): + """ + Unpack the .deb, do sanity checking, and gather info from it. + + Currently information gathering consists of getting the contents list. In + the hopefully near future, it should also include gathering info from the + control file. + + @ptype bootstrap_id: int + @param bootstrap_id: the id of the binary these packages + should be associated or zero meaning we are not bootstrapping + so insert into a temporary table + + @return True if the deb is valid and contents were imported + """ + rejected = not self.valid_deb() + self.__unpack() + + if not rejected and self.tmpdir: + cwd = os.getcwd() + try: + os.chdir(self.tmpdir) + if self.chunks[1] == "control.tar.gz": + control = tarfile.open(os.path.join(self.tmpdir, "control.tar.gz" ), "r:gz") + control.extract('control', self.tmpdir ) + if self.chunks[2] == "data.tar.gz": + data = tarfile.open(os.path.join(self.tmpdir, "data.tar.gz"), "r:gz") + elif self.chunks[2] == "data.tar.bz2": + data = tarfile.open(os.path.join(self.tmpdir, "data.tar.bz2" ), "r:bz2") + + for tarinfo in data: + try: + unicode( tarinfo.name ) + except: + print >> sys.stderr, "E: %s has non-unicode filename: %s" % (package,tarinfo.name) + + except: + traceback.print_exc() + result = False + + os.chdir(cwd) + if __name__ == "__main__": Binary( "/srv/ftp.debian.org/queue/accepted/halevt_0.1.3-2_amd64.deb" ).scan_package() diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 3fad3f50..308f5548 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -413,18 +413,22 @@ class DBConn(Singleton): @rtype: int @return: the database id for the given component """ - values={'value': filename} - query = "SELECT id FROM content_file_names WHERE file = %(value)s" - id = self.__get_single_id(query, values, cachename='content_file_names') - if not id: - c = self.db_con.cursor() - c.execute( "INSERT INTO content_file_names VALUES (DEFAULT, %(value)s) RETURNING id", - values ) - - id = c.fetchone()[0] - self.caches['content_file_names'].SetValue(values, id) - - return id + try: + values={'value': filename} + query = "SELECT id FROM content_file_names WHERE file = %(value)s" + id = self.__get_single_id(query, values, cachename='content_file_names') + if not id: + c = self.db_con.cursor() + c.execute( "INSERT INTO content_file_names VALUES (DEFAULT, %(value)s) RETURNING id", + values ) + + id = c.fetchone()[0] + self.caches['content_file_names'].SetValue(values, id) + + return id + except: + traceback.print_exc() + raise def get_or_set_contents_path_id(self, path): """ @@ -439,18 +443,22 @@ class DBConn(Singleton): @rtype: int @return: the database id for the given component """ - values={'value': path} - query = "SELECT id FROM content_file_paths WHERE path = %(value)s" - id = self.__get_single_id(query, values, cachename='content_path_names') - if not id: - c = self.db_con.cursor() - c.execute( "INSERT INTO content_file_paths VALUES (DEFAULT, %(value)s) RETURNING id", - values ) - - id = c.fetchone()[0] - self.caches['content_path_names'].SetValue(values, id) - - return id + try: + values={'value': path} + query = "SELECT id FROM content_file_paths WHERE path = %(value)s" + id = self.__get_single_id(query, values, cachename='content_path_names') + if not id: + c = self.db_con.cursor() + c.execute( "INSERT INTO content_file_paths VALUES (DEFAULT, %(value)s) RETURNING id", + values ) + + id = c.fetchone()[0] + self.caches['content_path_names'].SetValue(values, id) + + return id + except: + traceback.print_exc() + raise def get_suite_architectures(self, suite): """