diff --git a/optimize.sql b/optimize.sql new file mode 100644 index 0000000000000000000000000000000000000000..5b003484162dac9c268891c68e410f3678be8464 --- /dev/null +++ b/optimize.sql @@ -0,0 +1,4 @@ +optimize table base; +optimize table file; +optimize table media; +optimize table contents; diff --git a/scanner.py b/scanner.py new file mode 100644 index 0000000000000000000000000000000000000000..9d0904b089b1fbd960ec255f0e16b93868d24733 --- /dev/null +++ b/scanner.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +# Scan filesystem for new files to back up. + +import os +import sys +import md5 +import sha + +import MySQLdb + +class file_hash: + def __init__(self, filename): + fp = file(filename, "rb") + + md = md5.new() + sh = sha.new() + + while 1: + chunk = fp.read() + if chunk == "": + break + md.update(chunk) + sh.update(chunk) + + fp.close() + + self.md5 = md.hexdigest() + self.sha1 = sh.hexdigest() + +class xreadchunks: + def __init__(self, file, delimiter): + self.__file = file + self.__buf = "" + self.__pos = 0 + self.__delim = delimiter + + def __getitem__(self, key): + if self.__pos != key: + raise RuntimeError + while 1: + ix = self.__buf.find(self.__delim) + if ix != -1: + ret = self.__buf[:ix] + self.__buf = self.__buf[ix+len(self.__delim):] + self.__pos += 1 + return ret + fill = self.__file.read(8192) + if fill == "": + # We lose the last line unless it terminated by a delimiter. + raise IndexError + self.__buf += fill + +def scan(DBH, dir_id): + cursor = DBH.cursor() + + cursor.execute("SELECT dir_name FROM base" + " WHERE dir_id = %d" % (dir_id, )) + dir_name = cursor.fetchone()[0] + + find = os.popen("find " + dir_name + " -type f " + + "-printf '%TY-%Tm-%Td %TT %s %P\\0'", + "r") + + for line in xreadchunks(find, "\0"): + mtime = line[0:19] + [filesize, filename] = line[20:].split(" ", 2) + + cursor.execute("LOCK TABLES file WRITE") + + cursor.execute("SELECT count(*) FROM file" + " WHERE filename = %s" + " AND dir_id = %s" + " AND mtime = %s" + " AND size = %s", + (filename, dir_id, mtime, filesize)) + count = cursor.fetchone()[0] + if count == 0: + h = file_hash(os.path.join(dir_name, filename)) + cursor.execute("INSERT INTO file" + " (filename, dir_id, mtime, size," + " md5sum, sha1sum, verified, broken)" + " VALUES" + " (%s, %s, %s, %s," + " %s, %s, NOW(), %s)", + (filename, dir_id, mtime, filesize, + h.md5, h.sha1, 0)) + + cursor.execute("UNLOCK TABLES") + + cursor.close() + +def scan_all(DBH): + outer = DBH.cursor() + inner = DBH.cursor() + outer.execute("SELECT dir_id, first_scanned FROM base" + " WHERE active = 1") + for [dir_id, first_scanned] in outer.fetchall(): + scan(DBH, dir_id) + if first_scanned == None: + inner.execute("UPDATE base" + " SET first_scanned = NOW(), last_scanned = NOW()" + " WHERE dir_id = %s", + (dir_id, )) + else: + inner.execute("UPDATE base" + " SET last_scanned = NOW()" + " WHERE dir_id = %s", + (dir_id, )) + inner.close() + outer.close() + + +def main(): + DBH = MySQLdb.connect(db='isoonline') + scan_all(DBH) + +if __name__ == '__main__': + main()