Commit 4356ac92 authored by Niels Möller's avatar Niels Möller
Browse files

*** empty log message ***

Rev: src/make_char_classes:1.2
parent 2a60310a
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
;; a partition table index by character, and masks corresponding to ;; a partition table index by character, and masks corresponding to
;; the input classes. ;; the input classes.
;; Misc functions
(define (my-error s . args) (define (my-error s . args)
(error (apply format #f s args))) (error (apply format #f s args)))
...@@ -24,6 +25,11 @@ ...@@ -24,6 +25,11 @@
(cons (car l) (filter p (cdr l)))) (cons (car l) (filter p (cdr l))))
(else (filter p (cdr l))))) (else (filter p (cdr l)))))
(define (identity x) x)
(define (invert-predicate p)
(lambda (x) (not (p x))))
(define (reduce op start l) (define (reduce op start l)
(if (null? l) (if (null? l)
start start
...@@ -54,21 +60,34 @@ ...@@ -54,21 +60,34 @@
(else (else
(string-append (car list) separator (implode separator (cdr list)))))) (string-append (car list) separator (implode separator (cdr list))))))
(define-syntax when
(syntax-rules () ((when test . consequences) (if test (begin . consequences)))))
;; ;; A little more user-friendly input format.
(define (input->char-set o) (define (input->char-set o names)
(define (->ascii x) (define (->ascii x)
(if (integer? x) x (cond ((integer? x) x)
(char->ascii x))) ((char? x) (char->ascii x))
(else #f)))
(define (->char x)
(cond ((integer? x) (ascii->char x))
((char? x) x)
(else #f)))
(define (integer-or-char? x)
(or (integer? x) (char? x)))
(cond ((string? o) (cond ((string? o)
(string->char-set o)) (string->char-set o))
((symbol? o)
(cdr (assq o names)))
((and (pair? o) (not (pair? (cdr o))))
(ascii-range->char-set (->ascii (car o))
(+ 1 (->ascii (cdr o)))))
((list? o) ((list? o)
(reduce char-set-union (chars->char-set (reduce char-set-union (chars->char-set
(map ascii->char (filter integer? o))) (map ->char (filter integer-or-char? o)))
(map (lambda (pair) (map (lambda (o) (input->char-set o names))
(ascii-range->char-set (->ascii (car pair)) (filter (invert-predicate integer-or-char?) o))))
(+ 1 (->ascii (cdr pair)))))
(filter pair? o))))
(else (my-error "Bad char set specification ~s" o)))) (else (my-error "Bad char set specification ~s" o))))
;; Depends on charsets implemented as strings ;; Depends on charsets implemented as strings
...@@ -160,8 +179,7 @@ ...@@ -160,8 +179,7 @@
;; Destructivly intersect the leafs of two trees ;; Destructivly intersect the leafs of two trees
(define (tree-intersect! cache t1 t2) (define (tree-intersect! cache t1 t2)
(if (eq? t1 t2) (if (not (eq? t1 t2))
t1
(let* ((s1 (tree/set t1)) (let* ((s1 (tree/set t1))
(s2 (tree/set t2)) (s2 (tree/set t2))
(intersection (char-set-intersection s1 s2))) (intersection (char-set-intersection s1 s2)))
...@@ -195,9 +213,6 @@ ...@@ -195,9 +213,6 @@
(tree-intersect! cache t2 c)) (tree-intersect! cache t2 c))
t1)))))) t1))))))
(define-syntax when
(syntax-rules () ((when test . consequences) (if test (begin . consequences)))))
(define (partition! sets) (define (partition! sets)
(let ((cache (make-tree-cache sets))) (let ((cache (make-tree-cache sets)))
(let loop ((left sets)) (let loop ((left sets))
...@@ -231,21 +246,31 @@ ...@@ -231,21 +246,31 @@
leafs) leafs)
table)) table))
(define (build-mask root) (define (build-flags root)
(map-leafs tree/index root)) (map-leafs tree/index root))
(define (prepare-input l) (define (prepare-input l)
(map (lambda (pair) (let loop ((left l) (out '()) (names '()))
(cons (car pair) (make-tree (input->char-set (cdr pair))))) (if (null? left)
l)) out
(let* ((name (caar left))
(input (cdar left))
(set (input->char-set input
names)))
(werror "Read class ~a\n" name)
(loop (cdr left)
(cons (cons name
(make-tree set))
out)
(cons (cons name set) names))))))
#!
(define test-input (define test-input
'((alpha . ( (#\a . #\z) (#\A . #\Z) )) '((alpha . ( (#\a . #\z) (#\A . #\Z) ))
(digits . ( (#\0 . #\9))) (digits . ( (#\0 . #\9)))
(base64 . ( (#\a . #\z) (#\A . #\Z) (#\0 . #\9) #\+ #\/ #\=)) (base64 . ( (#\a . #\z) (#\A . #\Z) (#\0 . #\9) #\+ #\/ #\=))
(hex . ( (#\0 . #\9) (#\a . #\f) (#\A . #\F) )))) (hex . ( (#\0 . #\9) (#\a . #\f) (#\A . #\F) ))))
(define (test1) (define (test1)
(let* ((input (prepare-input test-input)) (let* ((input (prepare-input test-input))
(roots (map cdr input))) (roots (map cdr input)))
...@@ -253,10 +278,6 @@ ...@@ -253,10 +278,6 @@
(let ((leafs (index-leafs! roots))) (let ((leafs (index-leafs! roots)))
(werror "~s disjunct classes found." (length leafs)) (werror "~s disjunct classes found." (length leafs))
(build-char-table leafs)))) (build-char-table leafs))))
#!
(map (lambda (pair)
(cons (car pair) (tree-describe (cdr pair))))
input)))
!# !#
(define (bit->mask bit) (define (bit->mask bit)
...@@ -278,6 +299,29 @@ ...@@ -278,6 +299,29 @@
(for-each (lambda (class) (for-each (lambda (class)
(format #t "#define CHAR_~a (~a)\n" (format #t "#define CHAR_~a (~a)\n"
(car class) (car class)
(implode " | " (map bit->mask (build-mask (cdr class)))))) (implode " | " (map bit->mask (build-flags (cdr class))))))
classes) classes)
(format #t "#define CHAR_other 1\n"))) (format #t "#define CHAR_other 1\n")))
(define test-2-input
'((lower . "abcdefghijklmnopqrstuvwxyz")
(upper . "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
(alpha . (lower upper))
(digits . "0123456789")
(hex . (digits "abcdefABCDEF"))
;; base 64 digits, including the '=' pad character
(base64 . (alpha digits "+/="))
(control . ( (0 . #x1f) (#x80 . #x9f) #x7f))
;; SPC, TAB, LF, CR
(space . (#x20 #x9 #xa #xd))
;; \b \t \n \v \f \r
(escapable . (#x8 #x9 #xa #xb #xc #xd))
(punctuation . "-./_:*+=")
;; Characters defined by most iso-8859-1 character sets
(international . (#xa0 . #xff))))
(define (test-2)
(make-char-classes test-2-input))
(define (main . ignored)
(make-char-classes (read)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment