#!/usr/bin/env python
# Scan filesystem for new files to back up.

import os
import sys
import md5
import sha

import MySQLdb

class file_hash:
    def __init__(self, filename):
        fp = file(filename, "rb")

        md = md5.new()
        sh = sha.new()

        while 1:
            chunk = fp.read()
            if chunk == "":
                break
            md.update(chunk)
            sh.update(chunk)

        fp.close()

        self.md5 = md.hexdigest()
        self.sha1 = sh.hexdigest()

class xreadchunks:
    def __init__(self, file, delimiter):
        self.__file = file
        self.__buf = ""
        self.__pos = 0
        self.__delim = delimiter

    def __getitem__(self, key):
        if self.__pos != key:
            raise RuntimeError
        while 1:
            ix = self.__buf.find(self.__delim)
            if ix != -1:
                ret = self.__buf[:ix]
                self.__buf = self.__buf[ix+len(self.__delim):]
                self.__pos += 1
                return ret
            fill = self.__file.read(8192)
            if fill == "":
                # We lose the last line unless it terminated by a delimiter.
                raise IndexError
            self.__buf += fill

def scan(DBH, dir_id):
    cursor = DBH.cursor()

    cursor.execute("SELECT dir_name FROM base"
                   " WHERE dir_id = %d" % (dir_id, ))
    dir_name = cursor.fetchone()[0]

    find = os.popen("find " + dir_name + " -type f " +
                    "-printf '%TY-%Tm-%Td %TT %s %P\\0'",
                    "r")

    for line in xreadchunks(find, "\0"):
	mtime = line[0:19]
	[filesize, filename] = line[20:].split(" ", 2)

        cursor.execute("LOCK TABLES file WRITE")

        cursor.execute("SELECT count(*) FROM file"
                       " WHERE filename = %s"
                       " AND dir_id = %s"
                       " AND mtime = %s"
                       " AND size = %s",
                       (filename, dir_id, mtime, filesize))
        count = cursor.fetchone()[0]
        if count == 0:
            h = file_hash(os.path.join(dir_name, filename))
            cursor.execute("INSERT INTO file"
                           " (filename, dir_id, mtime, size,"
                           "  md5sum, sha1sum, verified, broken)"
                           " VALUES"
                           " (%s, %s, %s, %s,"
                           "  %s, %s, NOW(), %s)",
                           (filename, dir_id, mtime, filesize,
                            h.md5, h.sha1, 0))

        cursor.execute("UNLOCK TABLES")

    cursor.close()

def scan_all(DBH):
    outer = DBH.cursor()
    inner = DBH.cursor()
    outer.execute("SELECT dir_id, first_scanned FROM base"
                   " WHERE active = 1")
    for [dir_id, first_scanned] in outer.fetchall():
        scan(DBH, dir_id)
        if first_scanned == None:
            inner.execute("UPDATE base"
                          " SET first_scanned = NOW(), last_scanned = NOW()"
                          " WHERE dir_id = %s",
                          (dir_id, ))
        else:
            inner.execute("UPDATE base"
                          " SET last_scanned = NOW()"
                          " WHERE dir_id = %s",
                          (dir_id, ))
    inner.close()
    outer.close()
    

def main():
    DBH = MySQLdb.connect(db='isoonline')
    scan_all(DBH)

if __name__ == '__main__':
    main()