#!/usr/bin/env python # Scan filesystem for new files to back up. import os import sys import md5 import sha import MySQLdb class file_hash: def __init__(self, filename): fp = file(filename, "rb") md = md5.new() sh = sha.new() while 1: chunk = fp.read() if chunk == "": break md.update(chunk) sh.update(chunk) fp.close() self.md5 = md.hexdigest() self.sha1 = sh.hexdigest() class xreadchunks: def __init__(self, file, delimiter): self.__file = file self.__buf = "" self.__pos = 0 self.__delim = delimiter def __getitem__(self, key): if self.__pos != key: raise RuntimeError while 1: ix = self.__buf.find(self.__delim) if ix != -1: ret = self.__buf[:ix] self.__buf = self.__buf[ix+len(self.__delim):] self.__pos += 1 return ret fill = self.__file.read(8192) if fill == "": # We lose the last line unless it terminated by a delimiter. raise IndexError self.__buf += fill def scan(DBH, dir_id): cursor = DBH.cursor() inserter = DBH.cursor() cursor.execute("SELECT dir_name FROM base" " WHERE dir_id = %d" % (dir_id, )) dir_name = cursor.fetchone()[0] find = os.popen("find " + dir_name + " -type f " + "-printf '%TY-%Tm-%Td %TT %s %P\\0'", "r") cursor.execute("CREATE TEMPORARY TABLE cur_files (" " filename varchar(255) not null," " mtime datetime not null," " size bigint not null," " dir_id bigint not null);") vals = [] for line in xreadchunks(find, "\0"): mtime = line[0:19] [filesize, filename] = line[20:].split(" ", 2) vals.append((filename, mtime, filesize, dir_id)) if len(vals) > 100: insert_tmp_files(cursor, vals) vals = [] insert_tmp_files(cursor, vals) cursor.execute("LOCK TABLES file WRITE, base READ") cursor.execute("SELECT cur_files.filename, cur_files.size," " cur_files.mtime" " FROM cur_files" " LEFT JOIN file" " USING (filename, mtime, size, dir_id)" " WHERE file.filename IS NULL") while 1: res = cursor.fetchmany() if len(res) == 0: break for [filename, filesize, mtime] in res: fn = os.path.join(dir_name, filename) h = file_hash(fn) inserter.execute("INSERT INTO file" " (filename, dir_id, mtime, size," " md5sum, sha1sum, verified, broken)" " VALUES" " (%s, %s, %s, %s," " %s, %s, NOW(), %s)", (filename, dir_id, mtime, filesize, h.md5, h.sha1, 0)) cursor.execute("UNLOCK TABLES") cursor.execute("DROP TABLE cur_files") inserter.close() cursor.close() def insert_tmp_files(cursor, vals): cursor.executemany("INSERT INTO cur_files (filename, mtime, size, dir_id)" " VALUES (%s, %s, %s, %s)", vals) def scan_all(DBH): outer = DBH.cursor() inner = DBH.cursor() outer.execute("SELECT dir_id, first_scanned FROM base" " WHERE active = 1") for [dir_id, first_scanned] in outer.fetchall(): scan(DBH, dir_id) if first_scanned == None: inner.execute("UPDATE base" " SET first_scanned = NOW(), last_scanned = NOW()" " WHERE dir_id = %s", (dir_id, )) else: inner.execute("UPDATE base" " SET last_scanned = NOW()" " WHERE dir_id = %s", (dir_id, )) inner.close() outer.close() def main(): DBH = MySQLdb.connect(db='isoonline') scan_all(DBH) if __name__ == '__main__': main()