From: Philipp Kern Date: Thu, 28 Aug 2008 20:47:22 +0000 (+0200) Subject: implement sensible handling of checksum fields in .changes and .dsc X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aaa9ef21b62dad7f83af44f0b457e9ccc8a938ec;p=dak implement sensible handling of checksum fields in .changes and .dsc 2008-08-28 Philipp Kern * daklib/utils.py (check_hashes): adapt to different API, check sizes separately * daklib/utils.py (parse_changes, parse_deb822): refactor the string-based logic of parse_changes into a new function parse_deb822; parse_changes itself remains file-based * daklib/utils.py (hash_key): gives the key of a hash in the files dict * daklib/utils.py (create_hash, check_size): made more readable * daklib/utils.py (check_hash): just check the hashes and complain about missing checksums * daklib/utils.py (check_hash_fields): function to reject unknown checksums fields * daklib/utils.py (_ensure_changes_hash, _ensure_dsc_hash): helper functions for ensure_hashes; check their corresponding manifests' hashes * daklib/utils.py (ensure_hashes): retrieve the checksums fields from the original filecontents blob so that they do not need to be present in the .dak; refactored the actual checks by calling the aforementioned helper functions * daklib/utils.py (parse_checksums): parse a given checksums field in a manifest and insert the values found into the files dict, checking the file sizes on the way --- diff --git a/ChangeLog b/ChangeLog index 4440c874..87a42732 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,36 @@ +2008-08-28 Philipp Kern + + * daklib/utils.py (check_hashes): adapt to different API, check + sizes separately + + * daklib/utils.py (parse_changes, parse_deb822): refactor + the string-based logic of parse_changes into a new function + parse_deb822; parse_changes itself remains file-based + + * daklib/utils.py (hash_key): gives the key of a hash in the + files dict + + * daklib/utils.py (create_hash, check_size): made more readable + + * daklib/utils.py (check_hash): just check the hashes and complain + about missing checksums + + * daklib/utils.py (check_hash_fields): function to reject unknown + checksums fields + + * daklib/utils.py (_ensure_changes_hash, _ensure_dsc_hash): helper + functions for ensure_hashes; check their corresponding manifests' + hashes + + * daklib/utils.py (ensure_hashes): retrieve the checksums fields + from the original filecontents blob so that they do not need to + be present in the .dak; refactored the actual checks by calling + the aforementioned helper functions + + * daklib/utils.py (parse_checksums): parse a given checksums field + in a manifest and insert the values found into the files dict, + checking the file sizes on the way + 2008-08-15 Mark Hymers * daklib/utils.py: Actually import a module before using it. diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py index 123fd9f3..779f6765 100755 --- a/dak/process_unchecked.py +++ b/dak/process_unchecked.py @@ -919,8 +919,10 @@ def check_urgency (): ################################################################################ def check_hashes (): - utils.check_hash(".changes", files, "md5sum", apt_pkg.md5sum) - utils.check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum) + utils.check_hash(".changes", files, "md5", apt_pkg.md5sum) + utils.check_size(".changes", files) + utils.check_hash(".dsc", dsc_files, "md5", apt_pkg.md5sum) + utils.check_size(".dsc", dsc_files) # This is stupid API, but it'll have to do for now until # we actually have proper abstraction diff --git a/daklib/utils.py b/daklib/utils.py index 7ff93192..02278e91 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# vim:set et ts=4 sw=4: # Utility functions # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup @@ -100,34 +101,14 @@ def extract_component_from_section(section): ################################################################################ -def parse_changes(filename, signing_rules=0): - """Parses a changes file and returns a dictionary where each field is a -key. The mandatory first argument is the filename of the .changes -file. - -signing_rules is an optional argument: - - o If signing_rules == -1, no signature is required. - o If signing_rules == 0 (the default), a signature is required. - o If signing_rules == 1, it turns on the same strict format checking - as dpkg-source. - -The rules for (signing_rules == 1)-mode are: - - o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----" - followed by any PGP header data and must end with a blank line. - - o The data section must end with a blank line and must be followed by - "-----BEGIN PGP SIGNATURE-----". -""" - +def parse_deb822(contents, signing_rules=0): error = "" changes = {} - changes_in = open_file(filename) - lines = changes_in.readlines() + # Split the lines in the input, keeping the linebreaks. + lines = contents.splitlines(True) - if not lines: + if len(lines) == 0: raise ParseChangesError, "[Empty changes file]" # Reindex by line number so we can easily verify the format of @@ -193,7 +174,6 @@ The rules for (signing_rules == 1)-mode are: if signing_rules == 1 and inside_signature: raise InvalidDscError, index - changes_in.close() changes["filecontents"] = "".join(lines) if changes.has_key("source"): @@ -211,112 +191,208 @@ The rules for (signing_rules == 1)-mode are: ################################################################################ -def create_hash (lfiles, key, testfn, basedict = None): +def parse_changes(filename, signing_rules=0): + """Parses a changes file and returns a dictionary where each field is a +key. The mandatory first argument is the filename of the .changes +file. + +signing_rules is an optional argument: + + o If signing_rules == -1, no signature is required. + o If signing_rules == 0 (the default), a signature is required. + o If signing_rules == 1, it turns on the same strict format checking + as dpkg-source. + +The rules for (signing_rules == 1)-mode are: + + o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----" + followed by any PGP header data and must end with a blank line. + + o The data section must end with a blank line and must be followed by + "-----BEGIN PGP SIGNATURE-----". +""" + + changes_in = open_file(filename) + content = changes_in.read() + changes_in.close() + return parse_deb822(content, signing_rules) + +################################################################################ + +def hash_key(hashname): + return '%ssum' % hashname + +################################################################################ + +def create_hash(where, files, hashname, hashfunc): + """create_hash extends the passed files dict with the given hash by + iterating over all files on disk and passing them to the hashing + function given.""" + rejmsg = [] - for f in lfiles.keys(): + for f in files.keys(): try: file_handle = open_file(f) except CantOpenError: rejmsg.append("Could not open file %s for checksumming" % (f)) - # Check hash - if basedict and basedict.has_key(f): - basedict[f]['%ssum' % key] = testfn(file_handle) - file_handle.close() + files[f][hash_key(hashname)] = hashfunc(file_handle) + file_handle.close() return rejmsg ################################################################################ -def check_hash (where, lfiles, key, testfn, basedict = None): - rejmsg = [] - if basedict: - for f in basedict.keys(): - if f not in lfiles: - rejmsg.append("%s: no %s checksum" % (f, key)) - - for f in lfiles.keys(): - if basedict and f not in basedict: - rejmsg.append("%s: extraneous entry in %s checksums" % (f, key)) +def check_hash(where, files, hashname, hashfunc): + """check_hash checks the given hash in the files dict against the actual + files on disk. The hash values need to be present consistently in + all file entries. It does not modify its input in any way.""" + rejmsg = [] + for f in files.keys(): try: file_handle = open_file(f) + + # Check for the hash entry, to not trigger a KeyError. + if not files[f].has_key(hash_key(hashname)): + rejmsg.append("%s: misses %s checksum in %s" % (f, hashname, + where)) + continue + + # Actually check the hash for correctness. + if hashfunc(file_handle) != files[f][hash_key(hashname)]: + rejmsg.append("%s: %s check failed in %s" % (f, hashname, + where)) except CantOpenError: + # XXX: IS THIS THE BLOODY CASE WHEN THE FILE'S IN THE POOL!? continue + finally: + file_handle.close() + return rejmsg - # Check hash - if testfn(file_handle) != lfiles[f][key]: - rejmsg.append("%s: %s check failed." % (f, key)) - file_handle.close() - # Store the hashes for later use - if basedict: - basedict[f]['%ssum' % key] = lfiles[f][key] - # Check size +################################################################################ + +def check_size(where, files): + """check_size checks the file sizes in the passed files dict against the + files on disk.""" + + rejmsg = [] + for f in files.keys(): actual_size = os.stat(f)[stat.ST_SIZE] - size = int(lfiles[f]["size"]) + size = int(files[f]["size"]) if size != actual_size: rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s" % (f, actual_size, size, where)) + return rejmsg + +################################################################################ + +def check_hash_fields(what, manifest): + """check_hash_fields ensures that there are no checksum fields in the + given dict that we do not know about.""" + + rejmsg = [] + hashes = map(lambda x: x[0], known_hashes) + for field in manifest: + if field.startswith("checksums-"): + hashname = field.split("-",1)[1] + if hashname not in hashes: + rejmsg.append("Unsupported checksum field for %s "\ + "in %s" % (hashname, what)) + return rejmsg + +################################################################################ + +def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc): + if format >= version: + # The version should contain the specified hash. + func = check_hash + + # Import hashes from the changes + rejmsg = parse_checksums(".changes", files, changes, hashname) + if len(rejmsg) > 0: + return rejmsg + else: + # We need to calculate the hash because it can't possibly + # be in the file. + func = create_hash + return func(".changes", files, hashname, hashfunc) + +# We could add the orig which might be in the pool to the files dict to +# access the checksums easily. + +def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc): + """ensure_dsc_hashes' task is to ensure that each and every *present* hash + in the dsc is correct, i.e. identical to the changes file and if necessary + the pool. The latter task is delegated to check_hash.""" + rejmsg = [] + if not dsc.has_key('Checksums-%s' % (hashname,)): + return rejmsg + # Import hashes from the dsc + parse_checksums(".dsc", dsc_files, dsc, hashname) + # And check it... + rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc)) return rejmsg ################################################################################ def ensure_hashes(changes, dsc, files, dsc_files): - # Make sure we recognise the format of the Files: field - format = changes.get("format", "0.0").split(".",1) + rejmsg = [] + + # Make sure we recognise the format of the Files: field in the .changes + format = changes.get("format", "0.0").split(".", 1) if len(format) == 2: format = int(format[0]), int(format[1]) else: format = int(float(format[0])), 0 - rejmsg = [] - for x in changes: - if x.startswith("checksum-"): - h = x.split("-",1)[1] - if h not in dict(known_hashes): - rejmsg.append("Unsupported checksum field in .changes" % (h)) - - for x in dsc: - if x.startswith("checksum-"): - h = x.split("-",1)[1] - if h not in dict(known_hashes): - rejmsg.append("Unsupported checksum field in .dsc" % (h)) + # We need to deal with the original changes blob, as the fields we need + # might not be in the changes dict serialised into the .dak anymore. + orig_changes = parse_deb822(changes['filecontents']) + + # Copy the checksums over to the current changes dict. This will keep + # the existing modifications to it intact. + for field in orig_changes: + if field.startswith('checksums-'): + changes[field] = orig_changes[field] + + # Check for unsupported hashes + rejmsg.extend(check_hash_fields(".changes", changes)) + rejmsg.extend(check_hash_fields(".dsc", dsc)) # We have to calculate the hash if we have an earlier changes version than # the hash appears in rather than require it exist in the changes file - # I hate backwards compatibility - for h,f,v in known_hashes: - try: - if format < v: - for m in create_hash(files, h, f, files): - rejmsg.append(m) - else: - for m in check_hash(".changes %s" % (h), files, '%ssum' % h, f, files): - rejmsg.append(m) - except NoFilesFieldError: - rejmsg.append("No Checksums-%s: field in .changes" % (h)) - except UnknownFormatError, format: - rejmsg.append("%s: unknown format of .changes" % (format)) - except ParseChangesError, line: - rejmsg.append("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line)) + for hashname, hashfunc, version in known_hashes: + rejmsg.extend(_ensure_changes_hash(changes, format, version, files, + hashname, hashfunc)) + if "source" in changes["architecture"]: + rejmsg.extend(_ensure_dsc_hash(dsc, dsc_files, hashname, + hashfunc)) - if "source" not in changes["architecture"]: continue - - try: - if format < v: - for m in create_hash(dsc_files, h, f, dsc_files): - rejmsg.append(m) - else: - for m in check_hash(".dsc %s" % (h), dsc_files, '%ssum' % h, f, dsc_files): - rejmsg.append(m) - except UnknownFormatError, format: - rejmsg.append("%s: unknown format of .dsc" % (format)) - except NoFilesFieldError: - rejmsg.append("No Checksums-%s: field in .dsc" % (h)) - except ParseChangesError, line: - rejmsg.append("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line)) + return rejmsg +def parse_checksums(where, files, manifest, hashname): + rejmsg = [] + field = 'checksums-%s' % hashname + if not field in manifest: + return rejmsg + input = manifest[field] + for line in input.split('\n'): + if not line: + break + hash, size, file = line.strip().split(' ') + if not files.has_key(file): + rejmsg.append("%s: not present in files but in checksums-%s in %s" % + (file, hashname, where)) + if not files[file]["size"] == size: + rejmsg.append("%s: size differs for files and checksums-%s entry "\ + "in %s" % (file, hashname, where)) + files[file][hash_key(hashname)] = hash + for f in files.keys(): + if not files[f].has_key(hash_key(hashname)): + rejmsg.append("%s: no entry in checksums-%s in %s" % (file, + hashname, where)) return rejmsg ################################################################################