From ca90b8ddbd4ea771b5d47c97a1bd1ecae9ae60de Mon Sep 17 00:00:00 2001 From: Per Cederqvist <ceder@lysator.liu.se> Date: Fri, 10 Nov 2006 11:25:19 +0000 Subject: [PATCH] Handle warnings and parallell backups. Better handling of warnings. * backup-all: New option: --retry. Only run df on the specified partitions. Added handling of a "hold" file. * backup-one-task: New option: --retry. Added -warn, -nowarn and -neverwarnfree status files. * lysrdiff-status: Print number of warnings per partition, and list all warnings. Support starting two "backup-all" at the same time. * distribute-tasks (write_task_lists): New argument: partitions. Only output data for the specified partitions. Create a lock directory to ensure that two distribute-tasks processes cannot clobber the same file. (main): Expect partition number on the command line. Minor. * Makefile (install-one-task): New target, extracted from install. --- ChangeLog | 20 ++++++++++++++++++++ Makefile | 6 ++++-- backup-all | 38 +++++++++++++++++++++++++++++--------- backup-one-task | 26 +++++++++++++++++++++++++- distribute-tasks | 19 +++++++++++++------ lysrdiff-status | 5 +++++ 6 files changed, 96 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index 808b1d4..83ad99c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2006-11-10 Per Cederqvist <ceder@sedesopim.lysator.liu.se> + + Better handling of warnings. + * backup-all: New option: --retry. Only run df on the specified + partitions. Added handling of a "hold" file. + * backup-one-task: New option: --retry. Added -warn, -nowarn and + -neverwarnfree status files. + * lysrdiff-status: Print number of warnings per partition, and + list all warnings. + + Support starting two "backup-all" at the same time. + * distribute-tasks (write_task_lists): New argument: partitions. + Only output data for the specified partitions. Create a lock + directory to ensure that two distribute-tasks processes cannot + clobber the same file. + (main): Expect partition number on the command line. + + Minor. + * Makefile (install-one-task): New target, extracted from install. + 2006-10-30 Per Cederqvist <ceder@sedesopim.lysator.liu.se> Progress indication. Lock files. More output. Run ssh in batch diff --git a/Makefile b/Makefile index 4d18b16..7a48eb9 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,11 @@ BINDIR=/nobackup/backup.lysator/bin all:; -install: +install: install-one-task cp backup-all $(BINDIR)/ - cp backup-one-task $(BINDIR)/ cp distribute-tasks $(BINDIR)/ cp fetch-backup-work $(BINDIR)/ cp lysrdiff-status $(BINDIR)/ + +install-one-task: + cp backup-one-task $(BINDIR)/ diff --git a/backup-all b/backup-all index 51994f7..7d21088 100755 --- a/backup-all +++ b/backup-all @@ -1,10 +1,11 @@ #!/bin/sh usage () { - echo $0: usage: $0 '[ --failed ] partno...' >&2 + echo $0: usage: $0 '[ --failed | --retry ] partno...' >&2 } failed= +retry= while true do @@ -12,6 +13,9 @@ do x--failed) shift failed=--failed;; + x--retry) + shift + retry=--retry;; x--*) usage exit 1;; @@ -27,11 +31,15 @@ then exit 1 fi -df -h /lysrdiff/0/perm | sed -n 1p -for lysrdiffpart in $PARTS -do - df -h /lysrdiff/$lysrdiffpart/perm | sed 1d -done +rundf () { + df -h /lysrdiff/0/perm | sed -n 1p + for lysrdiffpart in $PARTS + do + df -h /lysrdiff/$lysrdiffpart/perm | sed 1d + done +} + +rundf for lysrdiffpart in $PARTS do @@ -42,7 +50,7 @@ do fi done -/nobackup/backup.lysator/bin/distribute-tasks +/nobackup/backup.lysator/bin/distribute-tasks $PARTS total=0 for lysrdiffpart in $PARTS @@ -55,17 +63,29 @@ for lysrdiffpart in $PARTS do while read category subcategory server serverpath do + if [ -f /nobackup/backup.lysator/etc/stop ] then echo Stop file found. Exiting. >&2 exit 1 fi + + if [ -f /nobackup/backup.lysator/etc/hold ] + then + echo Hold file found. Holding... >&2 + while [ -f /nobackup/backup.lysator/etc/hold ] + do + sleep 1 + done + echo Hold file disappeared. Continuing. >&2 + fi + /nobackup/backup.lysator/bin/backup-one-task \ - $failed \ + $failed $retry \ $lysrdiffpart "$category" "$subcategory" "$server" "$serverpath" \ "$ctr/$total $category $subcategory" ctr=`expr $ctr + 1` done < /lysrdiff/$lysrdiffpart/perm/lysrdiff/tasks done -df -h /lysrdiff/0/perm /lysrdiff/1/perm +rundf diff --git a/backup-one-task b/backup-one-task index 1925881..d19eba4 100755 --- a/backup-one-task +++ b/backup-one-task @@ -5,9 +5,11 @@ usage () { echo Example: $0 0 home ceder inservitus /export/home/ceder "1/123" >&2 echo Recognized options: >&2 echo ' --failed Only run failed backups' >&2 + echo ' --retry Only run backups with output from rdiff-backup' >&2 } failed=0 +retry=0 while [ $# -gt 1 ] do @@ -15,6 +17,9 @@ do x--failed) shift failed=1;; + x--retry) + shift + retry=1;; x--*) usage exit 1;; @@ -64,6 +69,12 @@ then exit 0 fi +if [ $retry = 1 ] && [ "`(wc -c < \"$rdifflogfile\") 2>/dev/null`" = 0 ] +then + rmdir "$lockdir" + exit 0 +fi + if [ "$server" = manhattan ] then rmdir "$lockdir" @@ -133,12 +144,25 @@ then rm -f "$base"/last-failure rm -f "$statebase"-fail - if [ `wc -c < "$rdifflogfile"` != 0 ] + if [ `wc -c < "$rdifflogfile"` = 0 ] then + touch "$statebase"-nowarn + touch "$base"/last-nowarn-backup + rm -f "$statebase"-neverwarnfree + rm -f "$statebase"-warn + else echo ${msg}: WARNING: output from rdiff-backup: echo sed 's/^/ /' "$rdifflogfile" echo + if [ ! -f "$statebase"-warn ] + then + touch "$statebase"-warn + fi + if [ ! -f "$statebase"-nowarn ] && [ ! -f "$statebase"-neverwarnfree ] + then + touch "$statebase"-neverwarnfree + fi fi else mv "$base"/backup-attempt-start "$base"/last-failure diff --git a/distribute-tasks b/distribute-tasks index 6b41b48..0b68c3f 100755 --- a/distribute-tasks +++ b/distribute-tasks @@ -3,6 +3,7 @@ import os import sys import random +import sets ROOT = "/lysrdiff" @@ -98,7 +99,7 @@ def read_new_tasks(): ordered_tasks = new_tasks + ordered_tasks return new_found -def write_task_lists(): +def write_task_lists(partitions): jobs = ordered_tasks[:] # Pick a few lucky jobs and move them to the front of the queue. @@ -112,23 +113,29 @@ def write_task_lists(): files = {} for job in jobs: - if job.lysrdiffpart() not in files: - files[job.lysrdiffpart()] = file( - tasklist_file(job.lysrdiffpart()) + ".new", "w") - files[job.lysrdiffpart()].write(job.task_desc() + "\n") + part = job.lysrdiffpart() + if part in partitions: + if part not in files: + fn = tasklist_file(job.lysrdiffpart()) + os.mkdir(fn + ".lock") + files[part] = file(fn + ".new", "w") + files[part].write(job.task_desc() + "\n") for lysrdiffpart, fp in files.items(): fp.close() fn = tasklist_file(lysrdiffpart) os.rename(fn + ".new", fn) + os.rmdir(fn + ".lock") def main(): global fatal + partitions = sets.Set([int(x) for x in sys.argv[1:]]) + for lysrdiffpart in range(2): read_tasks(lysrdiffpart) read_new_tasks() if not fatal: - write_task_lists() + write_task_lists(partitions) if __name__ == '__main__': main() diff --git a/lysrdiff-status b/lysrdiff-status index 00039be..81257e4 100755 --- a/lysrdiff-status +++ b/lysrdiff-status @@ -17,10 +17,15 @@ do echo -n ' '1day: `find $part/state -name \*-start -ctime 1 -print|wc -l` echo -n ' 'Stale: `find $part/state -name \*-start -ctime +1 -print|wc -l` echo -n ' 'Tot: `ls $part/state/*-end|wc -l` + echo -n ' 'Warn: `find $part/state -name \*-warn -print|wc -l` echo ' 'Err: `ls $part/state/*-fail 2>/dev/null|wc -l` done echo +echo Warnings: +echo +grep -h -- '-warn$' $statecache-* | sort +echo echo Failures: echo grep -h -- '-fail$' $statecache-* | sort -- GitLab