From efce8effb4f2551a0b9d9fa68dee11d8af14c2bd Mon Sep 17 00:00:00 2001
From: Per Cederqvist <ceder@lysator.liu.se>
Date: Tue, 24 Oct 2006 10:08:57 +0000
Subject: [PATCH] LYSrdiff now randomizes the backup order a bit less.

* distribute-tasks (ordered_tasks): New global variable.
  (read_tasks): Store the job in it as well.
  (read_new_tasks): Ditto.
  (write_task_lists): Retain roughly the old order, but move a few
    random jobs to the front of the job queue.
---
 ChangeLog        | 10 ++++++++++
 distribute-tasks | 18 ++++++++++++++++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a6089a8..7fbf658 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2006-10-24  Per Cederqvist  <ceder@sedesopim.lysator.liu.se>
+
+	LYSrdiff now randomizes the backup order a bit less.
+
+	* distribute-tasks (ordered_tasks): New global variable.
+	(read_tasks): Store the job in it as well.
+	(read_new_tasks): Ditto.
+	(write_task_lists): Retain roughly the old order, but move a few
+	random jobs to the front of the job queue.
+
 2006-10-23  Per Cederqvist  <ceder@lysator.liu.se>
 
 	LYSrdiff can now backup up to separate partitions, creates more
diff --git a/distribute-tasks b/distribute-tasks
index 0675682..6983029 100755
--- a/distribute-tasks
+++ b/distribute-tasks
@@ -52,6 +52,9 @@ def newtasks():
 # value: JobInfo
 tasks_per_source = {}
 
+# value: JobInfo
+ordered_tasks = []
+
 fatal = False
 
 def tasklist_file(lysrdiffpart):
@@ -75,6 +78,7 @@ def read_tasks(lysrdiffpart):
             fatal = True
                              
         tasks_per_source[info.source()] = info
+	ordered_tasks.append(info)
 
 def read_new_tasks():
     new_found = False
@@ -83,13 +87,23 @@ def read_new_tasks():
         if (info.host(), info.directory()) not in tasks_per_source:
             info.set_lysrdiffpart(newtasks())
             tasks_per_source[(info.host(), info.directory())] = info
+	    ordered_tasks.append(info)
             new_found = True
 
     return new_found
 
 def write_task_lists():
-    jobs = tasks_per_source.values()
-    random.shuffle(jobs)
+    jobs = ordered_tasks[:]
+
+    # Pick a few lucky jobs and move them to the front of the queue.
+    # This way, we get roughly the same order as on the previous
+    # backup (which is good because each job will then be backuped up
+    # with approximately the same interval) but no job is (on average)
+    # favoured over any other job.
+    for x in range(1 + int(0.005 * len(jobs))):
+	lucky_ix = random.randrange(0, len(jobs))
+	jobs = [jobs[lucky_ix]] + jobs[:lucky_ix] + jobs[lucky_ix+1:]
+
     files = {}
     for job in jobs:
         if job.lysrdiffpart() not in files:
-- 
GitLab