From f265a8e0edb2e2cb097d237a62a58dab0598583e Mon Sep 17 00:00:00 2001 From: Mark Hymers Date: Wed, 23 Mar 2011 09:53:09 +0000 Subject: [PATCH] Start work on Packages import Signed-off-by: Mark Hymers --- dak/packagescan.py | 115 +++++++++++++++++++++++++++++++++++++++++++++ daklib/dbconn.py | 33 +++++++++++++ daklib/packages.py | 94 ++++++++++++++++++++++++++++++++++++ docs/README.quotes | 7 --- 4 files changed, 242 insertions(+), 7 deletions(-) create mode 100755 dak/packagescan.py create mode 100755 daklib/packages.py diff --git a/dak/packagescan.py b/dak/packagescan.py new file mode 100755 index 00000000..2d2bab00 --- /dev/null +++ b/dak/packagescan.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +""" +Import data for Packages files from .deb files + +@contact: Debian FTPMaster +@copyright: 2008, 2009 Michael Casadevall +@copyright: 2009 Mike O'Connor +@copyright: 2011 Torsten Werner +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# < mvo> that screams for consolidation in libapt at least (that then in turn can +# use libdpkg ... ) - I guess the "d" means delayed ;) + +# (whilst discussing adding xz support to dak, and therefore python-apt, and +# therefore libapt-pkg) + +################################################################################ + +import sys +import apt_pkg + +from daklib.config import Config +from daklib.dbconn import * +from daklib.packages import PackagesScanner +from daklib import daklog +from daklib import utils + +################################################################################ + +def usage (exit_code=0): + print """Usage: dak packagescan [options] subcommand + +SUBCOMMANDS + scan + scan the debs in the existing pool and load metadata into the database + +OPTIONS + -h, --help + show this help and exit + +OPTIONS for scan + -l, --limit=NUMBER + maximum number of packages to scan +""" + sys.exit(exit_code) + +################################################################################ + +def scan_all(cnf, limit): + Logger = daklog.Logger(cnf.Cnf, 'packages scan') + result = PackagesScanner.scan_all(limit) + processed = '%(processed)d packages processed' % result + remaining = '%(remaining)d packages remaining' % result + Logger.log([processed, remaining]) + Logger.close() + +################################################################################ + +def main(): + cnf = Config() + cnf['Packages::Options::Help'] = '' + cnf['Packages::Options::Suite'] = '' + cnf['Packages::Options::Limit'] = '' + cnf['Packages::Options::Force'] = '' + arguments = [('h', "help", 'Packages::Options::Help'), + ('s', "suite", 'Packages::Options::Suite', "HasArg"), + ('l', "limit", 'Packages::Options::Limit', "HasArg"), + ('f', "force", 'Packages::Options::Force'), + ] + args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments, sys.argv) + options = cnf.SubTree('Packages::Options') + + if (len(args) != 1) or options['Help']: + usage() + + limit = None + if len(options['Limit']) > 0: + limit = int(options['Limit']) + + if args[0] == 'scan': + scan_all(cnf, limit) + return + + suite_names = utils.split_args(options['Suite']) + + force = bool(options['Force']) + + if args[0] == 'generate': + raise NotImplementError + + usage() + + +if __name__ == '__main__': + main() diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 038c014b..79e0621b 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -525,6 +525,25 @@ class DBBinary(ORMObject): dpkg.stdout.close() dpkg.wait() + def read_control(self): + ''' + Reads the control information from a binary. + + @rtype: tuple + @return: (stanza, controldict) stanza is the text of the control + section. controldict is the information in a dictionary + form + ''' + import apt_inst, apt_pk + fullpath = self.poolfile.fullpath + deb_file = open(fullpath, 'r') + stanza = apt_inst.debExtractControl(deb_file).rstrip() + control = dict(apt_pkg.TagSection(stanza)) + deb_file.close() + + return stanza, control + + __all__.append('DBBinary') @session_wrapper @@ -2172,6 +2191,20 @@ class DBSource(ORMObject): return ['source', 'version', 'install_date', 'maintainer', \ 'changedby', 'poolfile', 'install_date'] + def read_control(self): + ''' + Reads the control information from a dsc + + @rtype: tuple + @return: (stanza, controldict) stanza is the text of the control + section. controldict is the information in a dictionary + form + ''' + from debian.debfile import Deb822 + fullpath = self.poolfile.fullpath + fields = Deb822(open(self.poolfile.fullpath, 'r')) + return fields + __all__.append('DBSource') @session_wrapper diff --git a/daklib/packages.py b/daklib/packages.py new file mode 100755 index 00000000..27b6d287 --- /dev/null +++ b/daklib/packages.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +""" +Helper code for packages generation. + +@contact: Debian FTPMaster +@copyright: 2011 Torsten Werner +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +from daklib.dbconn import * +from daklib.config import Config + +from multiprocessing import Pool +from subprocess import Popen, PIPE + +import os.path + +class PackagesScanner(object): + ''' + PackagesScanner provides a threadsafe method scan() to scan the metadata of + a DBBinary object. + ''' + def __init__(self, binary_id): + ''' + The argument binary_id is the id of the DBBinary object that + should be scanned. + ''' + self.binary_id = binary_id + + def scan(self, dummy_arg = None): + ''' + This method does the actual scan and fills in the associated metadata + property. It commits any changes to the database. The argument dummy_arg + is ignored but needed by our threadpool implementation. + ''' + session = DBConn().session() + binary = session.query(DBBinary).get(self.binary_id) + fileset = set(binary.read_control()) + print fileset + #if len(fileset) == 0: + # fileset.add('EMPTY_PACKAGE') + #for filename in fileset: + # binary.contents.append(BinContents(file = filename)) + #session.commit() + session.close() + + @classmethod + def scan_all(class_, limit = None): + ''' + The class method scan_all() scans all binaries using multiple threads. + The number of binaries to be scanned can be limited with the limit + argument. Returns the number of processed and remaining packages as a + dict. + ''' + session = DBConn().session() + query = session.query(DBBinary).filter(DBBinary.contents == None) + remaining = query.count + if limit is not None: + query = query.limit(limit) + processed = query.count() + pool = Pool() + for binary in query.yield_per(100): + pool.apply_async(scan_helper, (binary.binary_id, )) + pool.close() + pool.join() + remaining = remaining() + session.close() + return { 'processed': processed, 'remaining': remaining } + +def scan_helper(binary_id): + ''' + This function runs in a subprocess. + ''' + scanner = PackagesScanner(binary_id) + scanner.scan() diff --git a/docs/README.quotes b/docs/README.quotes index 6b89fc0f..2b159a8f 100644 --- a/docs/README.quotes +++ b/docs/README.quotes @@ -347,10 +347,3 @@ Canadians: This is a lighthouse. Your call. I wish they wouldnt leave biscuits out, thats just tempting. Damnit. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -< mvo> that screams for consolidation in libapt at least (that then in turn can use libdpkg … ) - I guess the "d" means delayed ;) - -(whilst discussing adding xz support to dak, and therefore python-apt, and -therefore libapt-pkg) - -- 2.39.5