diff --git a/mediawikifs.scm b/mediawikifs.scm index 6f30b9fb5b98d58adb9eeba56831bae106c81186..c081981539784c08a09e50c0b8b9279c835e659c 100644 --- a/mediawikifs.scm +++ b/mediawikifs.scm @@ -46,20 +46,6 @@ (set! (form-urlencoded-separator) "&;") -(define base-uri - (make-uri - scheme: 'https - host: "datorhandbok.lysator.liu.se" - path: '(/ "api.php") - query: '((action . "query") - (export . #t) - (exportnowrap . #t)))) - - -(define (uri-for-page page) - (update-uri base-uri - query: (cons `(titles . ,page) - (uri-query base-uri)))) (define (get-xml-page url) @@ -67,54 +53,151 @@ url #f (lambda (port) (ssax:xml->sxml port '((mw . "http://www.mediawiki.org/xml/export-0.10/")))))) + + + + + + +(define base-uri + (make-uri + scheme: 'https + host: "datorhandbok.lysator.liu.se" + path: '(/ "api.php"))) + +(define (category-uri #!optional continue-from) + (update-uri + base-uri + query: `((action . query) + (list . allcategories) + (format . xml) + ,@(if continue-from + `((accontinue . ,continue-from)) + '())))) + + +(define (all-categories #!optional continue-from) + (let* ((page _ _ (get-xml-page (category-uri continue-from)))) + (append (map sxml:text ((sxpath '(// allcategories c)) page)) + (let ((continue ((if-sxpath '(// @ accontinue)) page))) + (if continue + (all-categories (sxml:text continue)) + '()))))) + +(define (category-members-uri category-name #!optional continue-from) + (update-uri + base-uri + query: `((action . query) + (list . categorymembers) + (cmtitle . ,(string-append "Category:" category-name)) + (format . xml) + ,@(if continue-from + `((cmcontinue . ,continue-from)) + '())))) + +(define (all-pages-in-category category-name #!optional continue-from) + (let* ((page _ _ (get-xml-page (category-members-uri + category-name continue-from)))) + (append (map sxml:text ((sxpath '(// categorymembers cm @ title)) page)) + (let ((continue ((if-sxpath '(// @ cmcontinue)) page))) + (if continue + (all-pages-in-category category-name (sxml:text continue)) + '()))))) + + +(define (uri-for-page page) + (update-uri base-uri + query: `((action . query) + (export . #t) + (exportnowrap . #t) + (titles . ,page)))) + (define (get-page ht pagename) (unless (hash-table-exists? ht pagename) - (let* ((sxml request response (get-xml-page (uri-for-page pagename)))) + (let* ((sxml _ _ (get-xml-page (uri-for-page pagename)))) (when ((if-sxpath '(// mw:page)) sxml) (set! (hash-table-ref ht pagename) sxml)))) (hash-table-ref/default ht pagename #f)) +(define categories (alist->hash-table + (map (cut cons <> 'not-downloaded) + (all-categories)))) + +(define (category-page-names category) + (case (hash-table-ref/default categories category 'no-such-page) + ((no-such-page) '()) + ((not-downloaded) + (let ((pages (all-pages-in-category category))) + (set! (hash-table-ref categories category) pages) + pages)) + (else => identity))) + +(define pages (make-hash-table)) + +(define (page-id page) + (let ((id-str (sxml:text ((sxpath '(// mw:page mw:id)) page)))) + (read (open-input-string id-str)))) + +(define (page-length page) + (let ((length-str (sxml:text ((sxpath '(// mw:page // mw:text @ bytes)) page)))) + (read (open-input-string length-str)))) + -(define ht (make-hash-table)) +#| + | / + | /<category> + | /<pages> + |# (define mediawiki-fs (make-filesystem init: (lambda () (print "Filesystem ready")) - readdir: (lambda (path) - (and (string=? path "/") - (cons* "." ".." (map ->string (hash-table-keys ht))))) - - getattr: (lambda (path) - (cond - ((string=? path "/") - (vector (bitwise-ior file/dir #o555) - 2 - (current-user-id) - (current-group-id) - (hash-table-size ht) - 0 0 0)) - (else - (let* ((page-path (car (string-split path "/"))) - (page (get-page ht page-path))) - (if page - (let* ((id-str (sxml:text ((sxpath '(// mw:page mw:id)) page))) - (length-str (sxml:text ((sxpath '(// mw:page // mw:text @ bytes)) page))) - (id (read (open-input-string id-str))) - (length (read (open-input-string length-str)))) - (vector (bitwise-ior file/reg #o444) - 1 + readdir: (lambda (path-str) + (let ((path (string-split path-str "/"))) + (cons* "." ".." + (map ->string + (cond ((null? path) (hash-table-keys categories)) + ((= 1 (length path)) (category-page-names (car path))) + (else '())))))) + + getattr: (lambda (path-str) + (print path-str) + (let ((path (string-split path-str "/"))) + (cond + ((null? path) + (vector (bitwise-ior file/dir #o555) + 2 + (current-user-id) + (current-group-id) + (hash-table-size categories) + 0 0 0)) + ((= 1 (length path)) + (let ((sub-pages (category-page-names (car path)))) + (if (null? sub-pages) + (vector (bitwise-ior file/dir #o555) + 2 0 0 0 0 0 0) + (vector (bitwise-ior file/dir #o555) + 2 (current-user-id) - id length - 0 0 0)) - #f))))) - - open: (lambda (path mode) - (cond ((string=? path "/") #f) - (else (let ((pagename (car (string-split path "/")))) - (get-page ht pagename))))) + (current-group-id) + (length sub-pages) + 0 0 0)))) + (else + (let* ((page (get-page pages (cadr path)))) + (and page + (vector (bitwise-ior file/reg #o444) + 1 ; TODO change with how many categories it belongs to + (current-user-id) (page-id page) + (page-length page) + 0 0 0))))))) + + open: (lambda (path-str mode) + (let ((path (string-split path-str "/"))) + (cond ((> 2 (length path)) #f) + (else (get-page pages (cadr path)))))) read: (lambda (handle size offset) (sxml:text