From: Ansgar Burchardt Date: Sun, 10 Jun 2012 14:22:27 +0000 (+0200) Subject: dak/clean_suites.py: update for multi-archive changes X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bf26571a81571c739079c17037d626e8fc967738;p=dak dak/clean_suites.py: update for multi-archive changes --- diff --git a/dak/clean_suites.py b/dak/clean_suites.py index 66bda62d..c5b06261 100755 --- a/dak/clean_suites.py +++ b/dak/clean_suites.py @@ -69,47 +69,40 @@ def check_binaries(now_date, delete_date, max_delete, session): # Get the list of binary packages not in a suite and mark them for # deletion. - - q = session.execute(""" -SELECT b.file, f.filename - FROM binaries b - LEFT JOIN files f - ON (b.file = f.id) - WHERE f.last_used IS NULL - AND b.id NOT IN - (SELECT ba.bin FROM bin_associations ba) - AND f.id NOT IN - (SELECT bqf.fileid FROM build_queue_files bqf)""") - for i in q.fetchall(): - Logger.log(["set lastused", i[1]]) - if not Options["No-Action"]: - session.execute("UPDATE files SET last_used = :lastused WHERE id = :fileid AND last_used IS NULL", - {'lastused': now_date, 'fileid': i[0]}) - - if not Options["No-Action"]: - session.commit() - # Check for any binaries which are marked for eventual deletion # but are now used again. - q = session.execute(""" -SELECT b.file, f.filename - FROM binaries b - LEFT JOIN files f - ON (b.file = f.id) - WHERE f.last_used IS NOT NULL - AND (b.id IN - (SELECT ba.bin FROM bin_associations ba) - OR f.id IN - (SELECT bqf.fileid FROM build_queue_files bqf))""") - - for i in q.fetchall(): - Logger.log(["unset lastused", i[1]]) - if not Options["No-Action"]: - session.execute("UPDATE files SET last_used = NULL WHERE id = :fileid", {'fileid': i[0]}) - - if not Options["No-Action"]: - session.commit() + query = """ + WITH usage AS ( + SELECT + af.archive_id AS archive_id, + af.file_id AS file_id, + af.component_id AS component_id, + BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba + JOIN suite s ON ba.suite = s.id + WHERE ba.bin = b.id + AND s.archive_id = af.archive_id)) + AS in_use + FROM files_archive_map af + JOIN binaries b ON af.file_id = b.file + GROUP BY af.archive_id, af.file_id, af.component_id + ) + + UPDATE files_archive_map af + SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END + FROM usage, files f, archive + WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id + AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) + AND af.file_id = f.id + AND af.archive_id = archive.id + RETURNING archive.name, f.filename, af.last_used IS NULL""" + + res = session.execute(query, {'last_used': now_date}) + for i in res: + op = "set lastused" + if i[2]: + op = "unset lastused" + Logger.log([op, i[0], i[1]]) ######################################## @@ -118,75 +111,62 @@ def check_sources(now_date, delete_date, max_delete, session): # Get the list of source packages not in a suite and not used by # any binaries. - q = session.execute(""" -SELECT s.id, s.file, f.filename - FROM source s - LEFT JOIN files f - ON (s.file = f.id) - WHERE f.last_used IS NULL - AND s.id NOT IN - (SELECT sa.source FROM src_associations sa) - AND s.id NOT IN - (SELECT b.source FROM binaries b) - AND s.id NOT IN (SELECT esr.src_id FROM extra_src_references esr) - AND f.id NOT IN - (SELECT bqf.fileid FROM build_queue_files bqf)""") #### XXX: this should ignore cases where the files for the binary b #### have been marked for deletion (so the delay between bins go #### byebye and sources go byebye is 0 instead of StayOfExecution) - for i in q.fetchall(): - source_id = i[0] - dsc_file_id = i[1] - dsc_fname = i[2] - - # Mark the .dsc file for deletion - Logger.log(["set lastused", dsc_fname]) - if not Options["No-Action"]: - session.execute("""UPDATE files SET last_used = :last_used - WHERE id = :dscfileid AND last_used IS NULL""", - {'last_used': now_date, 'dscfileid': dsc_file_id}) - - # Mark all other files references by .dsc too if they're not used by anyone else - x = session.execute("""SELECT f.id, f.filename FROM files f, dsc_files d - WHERE d.source = :sourceid AND d.file = f.id""", - {'sourceid': source_id}) - for j in x.fetchall(): - file_id = j[0] - file_name = j[1] - y = session.execute("SELECT id FROM dsc_files d WHERE d.file = :fileid", {'fileid': file_id}) - if len(y.fetchall()) == 1: - Logger.log(["set lastused", file_name]) - if not Options["No-Action"]: - session.execute("""UPDATE files SET last_used = :lastused - WHERE id = :fileid AND last_used IS NULL""", - {'lastused': now_date, 'fileid': file_id}) - - if not Options["No-Action"]: - session.commit() - # Check for any sources which are marked for deletion but which # are now used again. - q = session.execute(""" -SELECT f.id, f.filename FROM source s, files f, dsc_files df - WHERE f.last_used IS NOT NULL AND s.id = df.source AND df.file = f.id - AND ((EXISTS (SELECT 1 FROM src_associations sa WHERE sa.source = s.id)) - OR (EXISTS (SELECT 1 FROM extra_src_references esr WHERE esr.src_id = s.id)) - OR (EXISTS (SELECT 1 FROM binaries b WHERE b.source = s.id)) - OR (EXISTS (SELECT 1 FROM build_queue_files bqf WHERE bqf.fileid = s.file)))""") #### XXX: this should also handle deleted binaries specially (ie, not #### reinstate sources because of them - for i in q.fetchall(): - Logger.log(["unset lastused", i[1]]) - if not Options["No-Action"]: - session.execute("UPDATE files SET last_used = NULL WHERE id = :fileid", - {'fileid': i[0]}) + # TODO: the UPDATE part is the same as in check_binaries. Merge? + + query = """ + WITH usage AS ( + SELECT + af.archive_id AS archive_id, + af.file_id AS file_id, + af.component_id AS component_id, + BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa + JOIN suite s ON sa.suite = s.id + WHERE sa.source = df.source + AND s.archive_id = af.archive_id) + OR EXISTS (SELECT 1 FROM files_archive_map af_bin + JOIN binaries b ON af_bin.file_id = b.file + WHERE b.source = df.source + AND af_bin.archive_id = af.archive_id) + OR EXISTS (SELECT 1 FROM extra_src_references esr + JOIN bin_associations ba ON esr.bin_id = ba.bin + JOIN binaries b ON ba.bin = b.id + JOIN suite s ON ba.suite = s.id + WHERE esr.src_id = df.source + AND s.archive_id = af.archive_id)) + AS in_use + FROM files_archive_map af + JOIN dsc_files df ON af.file_id = df.file + GROUP BY af.archive_id, af.file_id, af.component_id + ) - if not Options["No-Action"]: - session.commit() + UPDATE files_archive_map af + SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END + FROM usage, files f, archive + WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id + AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) + AND af.file_id = f.id + AND af.archive_id = archive.id + + RETURNING archive.name, f.filename, af.last_used IS NULL + """ + + res = session.execute(query, {'last_used': now_date}) + for i in res: + op = "set lastused" + if i[2]: + op = "unset lastused" + Logger.log([op, i[0], i[1]]) ######################################## @@ -200,26 +180,22 @@ def check_files(now_date, delete_date, max_delete, session): print "Checking for unused files..." q = session.execute(""" -SELECT id, filename FROM files f - WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = f.id) - AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = f.id) - AND NOT EXISTS (SELECT 1 FROM changes_pool_files cpf WHERE cpf.fileid = f.id) - AND NOT EXISTS (SELECT 1 FROM build_queue_files qf WHERE qf.fileid = f.id) - AND last_used IS NULL - ORDER BY filename""") - - ql = q.fetchall() - if len(ql) > 0: - utils.warn("check_files found something it shouldn't") - for x in ql: - utils.warn("orphaned file: %s" % x) - Logger.log(["set lastused", x[1], "ORPHANED FILE"]) - if not Options["No-Action"]: - session.execute("UPDATE files SET last_used = :lastused WHERE id = :fileid", - {'lastused': now_date, 'fileid': x[0]}) + UPDATE files_archive_map af + SET last_used = :last_used + FROM files f, archive + WHERE af.file_id = f.id + AND af.archive_id = archive.id + AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id) + AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) + AND af.last_used IS NULL + RETURNING archive.name, f.filename""", {'last_used': now_date}) + + for x in q: + utils.warn("orphaned file: {0}".format(x)) + Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"]) - if not Options["No-Action"]: - session.commit() + if not Options["No-Action"]: + session.commit() def clean_binaries(now_date, delete_date, max_delete, session): # We do this here so that the binaries we remove will have their @@ -227,14 +203,19 @@ def clean_binaries(now_date, delete_date, max_delete, session): # XXX: why doesn't this remove the files here as well? I don't think it # buys anything keeping this separate - print "Cleaning binaries from the DB..." + print "Deleting from binaries table... " - for bin in session.query(DBBinary).join(DBBinary.poolfile).filter(PoolFile.last_used <= delete_date): - Logger.log(["delete binary", bin.poolfile.filename]) - if not Options["No-Action"]: - session.delete(bin) - if not Options["No-Action"]: - session.commit() + q = session.execute(""" + DELETE FROM binaries b + USING files f + WHERE f.id = b.file + AND NOT EXISTS (SELECT 1 FROM files_archive_map af + WHERE af.file_id = b.file + AND (af.last_used IS NULL OR af.last_used >= :delete_date)) + RETURNING f.filename + """, {'delete_date': delete_date}) + for b in q: + Logger.log(["delete binary", b[0]]) ######################################## @@ -262,31 +243,47 @@ def clean(now_date, delete_date, max_delete, session): # Delete from source print "Deleting from source table... " q = session.execute(""" -SELECT s.id, f.filename FROM source s, files f - WHERE f.last_used <= :deletedate - AND s.file = f.id - AND s.id NOT IN (SELECT src_id FROM extra_src_references)""", {'deletedate': delete_date}) - for s in q.fetchall(): - Logger.log(["delete source", s[1], s[0]]) - if not Options["No-Action"]: - session.execute("DELETE FROM dsc_files WHERE source = :s_id", {"s_id":s[0]}) - session.execute("DELETE FROM source WHERE id = :s_id", {"s_id":s[0]}) + WITH + deleted_sources AS ( + DELETE FROM source + USING files f + WHERE source.file = f.id + AND NOT EXISTS (SELECT 1 FROM files_archive_map af + WHERE af.file_id = source.file + AND (af.last_used IS NULL OR af.last_used >= :delete_date)) + RETURNING source.id AS id, f.filename AS filename + ), + deleted_dsc_files AS ( + DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources) + RETURNING df.file AS file_id + ), + now_unused_source_files AS ( + UPDATE files_archive_map af + SET last_used = :delete_date -- Kill it now. We waited long enough before removing the .dsc. + WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files) + AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) + ) + SELECT filename FROM deleted_sources""", + {'delete_date': delete_date}) + for s in q: + Logger.log(["delete source", s[0]]) if not Options["No-Action"]: session.commit() # Delete files from the pool - old_files = session.query(PoolFile).filter(PoolFile.last_used <= delete_date) + old_files = session.query(ArchiveFile).filter(ArchiveFile.last_used <= delete_date).join(Archive) if max_delete is not None: old_files = old_files.limit(max_delete) print "Limiting removals to %d" % max_delete - for pf in old_files: - filename = os.path.join(pf.location.path, pf.filename) + for af in old_files: + filename = af.path if not os.path.exists(filename): - utils.warn("can not find '%s'." % (filename)) + Logger.log(["database referred to non-existing file", af.path]) + session.delete(af) continue - Logger.log(["delete pool file", filename]) + Logger.log(["delete archive file", filename]) if os.path.isfile(filename): if os.path.islink(filename): count += 1 @@ -302,12 +299,16 @@ SELECT s.id, f.filename FROM source s, files f if os.path.exists(dest_filename): dest_filename = utils.find_next_free(dest_filename) - Logger.log(["move to morgue", filename, dest_filename]) if not Options["No-Action"]: - utils.move(filename, dest_filename) + if af.archive.use_morgue: + Logger.log(["move to morgue", filename, dest_filename]) + utils.move(filename, dest_filename) + else: + Logger.log(["removed file", filename]) + os.unlink(filename) if not Options["No-Action"]: - session.delete(pf) + session.delete(af) session.commit() else: @@ -317,6 +318,16 @@ SELECT s.id, f.filename FROM source s, files f Logger.log(["total", count, utils.size_type(size)]) print "Cleaned %d files, %s." % (count, utils.size_type(size)) + # Delete entries in files no longer referenced by any archive + query = """ + DELETE FROM files f + WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id) + """ + session.execute(query) + + if not Options["No-Action"]: + session.commit() + ################################################################################ def clean_maintainers(now_date, delete_date, max_delete, session): @@ -385,8 +396,7 @@ def clean_empty_directories(session): count = 0 cursor = session.execute( - "SELECT DISTINCT(path) FROM location WHERE type = :type", - {'type': 'pool'}, + """SELECT DISTINCT(path) FROM archive""" ) bases = [x[0] for x in cursor.fetchall()] @@ -454,6 +464,8 @@ def main(): clean_fingerprints(now_date, delete_date, max_delete, session) clean_empty_directories(session) + session.rollback() + Logger.close() ################################################################################