#!/usr/bin/env python # Scan filesystem for new files to back up. import os import sys import md5 import sha import MySQLdb class file_hash: def __init__(self, filename): fp = file(filename, "rb") md = md5.new() sh = sha.new() while 1: chunk = fp.read() if chunk == "": break md.update(chunk) sh.update(chunk) fp.close() self.md5 = md.hexdigest() self.sha1 = sh.hexdigest() class xreadchunks: def __init__(self, file, delimiter): self.__file = file self.__buf = "" self.__pos = 0 self.__delim = delimiter def __getitem__(self, key): if self.__pos != key: raise RuntimeError while 1: ix = self.__buf.find(self.__delim) if ix != -1: ret = self.__buf[:ix] self.__buf = self.__buf[ix+len(self.__delim):] self.__pos += 1 return ret fill = self.__file.read(8192) if fill == "": # We lose the last line unless it terminated by a delimiter. raise IndexError self.__buf += fill def scan(DBH, dir_id): cursor = DBH.cursor() cursor.execute("SELECT dir_name FROM base" " WHERE dir_id = %d" % (dir_id, )) dir_name = cursor.fetchone()[0] find = os.popen("find " + dir_name + " -type f " + "-printf '%TY-%Tm-%Td %TT %s %P\\0'", "r") for line in xreadchunks(find, "\0"): mtime = line[0:19] [filesize, filename] = line[20:].split(" ", 2) cursor.execute("LOCK TABLES file WRITE") cursor.execute("SELECT count(*) FROM file" " WHERE filename = %s" " AND dir_id = %s" " AND mtime = %s" " AND size = %s", (filename, dir_id, mtime, filesize)) count = cursor.fetchone()[0] if count == 0: h = file_hash(os.path.join(dir_name, filename)) cursor.execute("INSERT INTO file" " (filename, dir_id, mtime, size," " md5sum, sha1sum, verified, broken)" " VALUES" " (%s, %s, %s, %s," " %s, %s, NOW(), %s)", (filename, dir_id, mtime, filesize, h.md5, h.sha1, 0)) cursor.execute("UNLOCK TABLES") cursor.close() def scan_all(DBH): outer = DBH.cursor() inner = DBH.cursor() outer.execute("SELECT dir_id, first_scanned FROM base" " WHERE active = 1") for [dir_id, first_scanned] in outer.fetchall(): scan(DBH, dir_id) if first_scanned == None: inner.execute("UPDATE base" " SET first_scanned = NOW(), last_scanned = NOW()" " WHERE dir_id = %s", (dir_id, )) else: inner.execute("UPDATE base" " SET last_scanned = NOW()" " WHERE dir_id = %s", (dir_id, )) inner.close() outer.close() def main(): DBH = MySQLdb.connect(db='isoonline') scan_all(DBH) if __name__ == '__main__': main()