Skip to content
Snippets Groups Projects
Commit f7ff2b07 authored by Per Cederqvist's avatar Per Cederqvist
Browse files

(scan): Performance fix: don't do a select per file we examine.

	Insert all of the files into a temporary table instead, and do
	a left join to find out which of them that are new.
(insert_tmp_files): New function.
parent 97ec7493
No related branches found
No related tags found
No related merge requests found
......@@ -52,6 +52,7 @@ class xreadchunks:
def scan(DBH, dir_id):
cursor = DBH.cursor()
inserter = DBH.cursor()
cursor.execute("SELECT dir_name FROM base"
" WHERE dir_id = %d" % (dir_id, ))
......@@ -61,34 +62,59 @@ def scan(DBH, dir_id):
"-printf '%TY-%Tm-%Td %TT %s %P\\0'",
"r")
cursor.execute("CREATE TEMPORARY TABLE cur_files ("
" filename varchar(255) not null,"
" mtime datetime not null,"
" size bigint not null,"
" dir_id bigint not null);")
vals = []
for line in xreadchunks(find, "\0"):
mtime = line[0:19]
[filesize, filename] = line[20:].split(" ", 2)
cursor.execute("LOCK TABLES file WRITE")
cursor.execute("SELECT count(*) FROM file"
" WHERE filename = %s"
" AND dir_id = %s"
" AND mtime = %s"
" AND size = %s",
(filename, dir_id, mtime, filesize))
count = cursor.fetchone()[0]
if count == 0:
h = file_hash(os.path.join(dir_name, filename))
cursor.execute("INSERT INTO file"
" (filename, dir_id, mtime, size,"
" md5sum, sha1sum, verified, broken)"
" VALUES"
" (%s, %s, %s, %s,"
" %s, %s, NOW(), %s)",
(filename, dir_id, mtime, filesize,
h.md5, h.sha1, 0))
cursor.execute("UNLOCK TABLES")
vals.append((filename, mtime, filesize, dir_id))
if len(vals) > 100:
insert_tmp_files(cursor, vals)
vals = []
insert_tmp_files(cursor, vals)
cursor.execute("LOCK TABLES file WRITE, base READ")
cursor.execute("SELECT cur_files.filename, cur_files.size,"
" cur_files.mtime"
" FROM cur_files"
" LEFT JOIN file"
" USING (filename, mtime, size, dir_id)"
" WHERE file.filename IS NULL")
while 1:
res = cursor.fetchmany()
if len(res) == 0:
break
for [filename, filesize, mtime] in res:
fn = os.path.join(dir_name, filename)
h = file_hash(fn)
inserter.execute("INSERT INTO file"
" (filename, dir_id, mtime, size,"
" md5sum, sha1sum, verified, broken)"
" VALUES"
" (%s, %s, %s, %s,"
" %s, %s, NOW(), %s)",
(filename, dir_id, mtime, filesize,
h.md5, h.sha1, 0))
cursor.execute("UNLOCK TABLES")
cursor.execute("DROP TABLE cur_files")
inserter.close()
cursor.close()
def insert_tmp_files(cursor, vals):
cursor.executemany("INSERT INTO cur_files (filename, mtime, size, dir_id)"
" VALUES (%s, %s, %s, %s)",
vals)
def scan_all(DBH):
outer = DBH.cursor()
inner = DBH.cursor()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment