From: Joerg Jaspert Date: Mon, 21 Apr 2008 14:54:18 +0000 (+0200) Subject: Add dump expire script and use it X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3de21bfb588537661d127b8560342b65fccd55cf;p=dak Add dump expire script and use it --- diff --git a/ChangeLog b/ChangeLog index a66eca56..0e47ba1c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2008-04-21 Joerg Jaspert + * scripts/debian/expire_dumps: New script, expires old database + dumps, using a scheme to keep more of the recent dumps. + + * config/debian/cron.daily: Use the new script. Also - compress + all files older than 7 days, instead of 30. + * dak/process_accepted.py (install): Do not break if a source/maintainer combination is already in src_uploaders, "just" warn us. diff --git a/config/debian/cron.daily b/config/debian/cron.daily index 215bdb59..234bb477 100755 --- a/config/debian/cron.daily +++ b/config/debian/cron.daily @@ -154,7 +154,7 @@ pg_dump projectb > $POSTDUMP ################################################################################ -#TS=$(($TS+1)); echo Archive maintenance timestamp $TS: $(date +%X) +TS=$(($TS+1)); echo Archive maintenance timestamp $TS: $(date +%X) # Vacuum the database # (JJ, 20-04-2008) disabled, as we have autovacuum set to on in postgres. @@ -163,6 +163,9 @@ pg_dump projectb > $POSTDUMP # whose purpose is to automate the execution of VACUUM and ANALYZE commands." # echo "VACUUM; VACUUM ANALYZE;" | psql projectb 2>&1 | grep -v "^NOTICE: Skipping.*only table owner can VACUUM it$" +echo "Expiring old database dumps..." +(cd $base/backup; $scriptsdir/expire_dumps -d . -p -f "dump_*") + ################################################################################ TS=$(($TS+1)); echo Archive maintenance timestamp $TS: $(date +%X) @@ -206,10 +209,10 @@ apt-ftparchive -q clean apt.conf TS=$(($TS+1)); echo Archive maintenance timestamp $TS: $(date +%X) -# Compress psql backups older than a month, but no more than 20 of them +# Compress psql backups older than a week, but no more than 20 of them (cd $base/backup/ - find -maxdepth 1 -mindepth 1 -type f -name 'dump_*' \! -name '*.bz2' \! -name '*.gz' -mtime +30 | + find -maxdepth 1 -mindepth 1 -type f -name 'dump_*' \! -name '*.bz2' \! -name '*.gz' -mtime +7 | sort | head -n20 | while read dumpname; do echo "Compressing $dumpname" bzip2 -9 "$dumpname" diff --git a/scripts/debian/expire_dumps b/scripts/debian/expire_dumps new file mode 100755 index 00000000..b09f88dd --- /dev/null +++ b/scripts/debian/expire_dumps @@ -0,0 +1,143 @@ +#!/usr/bin/python + +# Copyright (C) 2007 Florian Reitmeir +# Copyright (C) 2008 Joerg Jaspert + +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# requires: python-dateutil + +import glob, os, sys +import time, datetime +import re +from datetime import datetime +from datetime import timedelta +from optparse import OptionParser + +RULES = [ + {'days':14, 'interval':0}, + {'days':31, 'interval':1}, + {'days':365, 'interval':31}, + {'days':3650, 'interval':365}, + + # keep 14 days, all each day + # keep 31 days, 1 each 7th day + # keep 365 days, 1 each 31th day +] + +TODAY = datetime.today() +VERBOSE = False +NOACTION = False +PRINT = False +PREFIX = '' +PATH = '' + +def all_files(pattern, search_path, pathsep=os.pathsep): + """ Given a search path, yield all files matching the pattern. """ + for path in search_path.split(pathsep): + for match in glob.glob(os.path.join(path, pattern)): + yield match + +def parse_file_dates(list): + out = [] + # dump_2006.05.02-11:52:01.bz2 + p = re.compile('^\./dump_([0-9]{4})\.([0-9]{2})\.([0-9]{2})-([0-9]{2}):([0-9]{2}):([0-9]{2})(.bz2)?$') + for file in list: + m = p.search(file) + if m: + d = datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), int(m.group(6))) + out.append({'name': file, 'date': d}) + return out + +def prepare_rules(rules): + out = [] + for rule in rules: + out.append( + { + 'days':timedelta(days=rule['days']), + 'interval':timedelta(days=rule['interval'])} + ) + return out + +def expire(rules, list): + t_rules=prepare_rules(rules) + rule = t_rules.pop(0) + last = list.pop(0) + + for file in list: + if VERBOSE: + print "current file to expire: " + file['name'] + print file['date'] + + # check if rule applies + if (file['date'] < (TODAY-rule['days'])): + if VERBOSE: + print "move to next rule" + if t_rules: + rule = t_rules.pop(0) + + if (last['date'] - file['date']) < rule['interval']: + if VERBOSE: + print "unlink file:" + file['name'] + if PRINT: + print file['name'] + if not NOACTION: + os.unlink(file['name']) + else: + last = file + if VERBOSE: + print "kept file:" + file['name'] + + +parser = OptionParser() +parser.add_option("-d", "--directory", dest="directory", + help="directory name", metavar="Name") +parser.add_option("-f", "--pattern", dest="pattern", + help="Pattern maybe some glob", metavar="*.backup") +parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, + help="verbose") +parser.add_option("-n", "--no-action", action="store_true", dest="noaction", default=False, + help="just prints what would be done, this implies verbose") +parser.add_option("-p", "--print", action="store_true", dest="printfiles", default=False, + help="just print the filenames that should be deleted, this forbids verbose") + +(options, args) = parser.parse_args() + +if (not options.directory): + parser.error("no directory to check given") + +if options.noaction: + VERBOSE=True + NOACTION=True + +if options.verbose: + VERBOSE=True + +if options.printfiles: + VERBOSE=False + PRINT=True + +files = sorted( list(all_files(options.pattern,options.directory)), reverse=True ); + +if not files: + sys.exit(0) + +files_dates = parse_file_dates(files); +expire(RULES, files_dates)