Commit 4356ac92 authored by Niels Möller's avatar Niels Möller
Browse files

*** empty log message ***

Rev: src/make_char_classes:1.2
parent 2a60310a
......@@ -8,6 +8,7 @@
;; a partition table index by character, and masks corresponding to
;; the input classes.
;; Misc functions
(define (my-error s . args)
(error (apply format #f s args)))
......@@ -24,6 +25,11 @@
(cons (car l) (filter p (cdr l))))
(else (filter p (cdr l)))))
(define (identity x) x)
(define (invert-predicate p)
(lambda (x) (not (p x))))
(define (reduce op start l)
(if (null? l)
start
......@@ -54,21 +60,34 @@
(else
(string-append (car list) separator (implode separator (cdr list))))))
(define-syntax when
(syntax-rules () ((when test . consequences) (if test (begin . consequences)))))
;;
(define (input->char-set o)
;; A little more user-friendly input format.
(define (input->char-set o names)
(define (->ascii x)
(if (integer? x) x
(char->ascii x)))
(cond ((integer? x) x)
((char? x) (char->ascii x))
(else #f)))
(define (->char x)
(cond ((integer? x) (ascii->char x))
((char? x) x)
(else #f)))
(define (integer-or-char? x)
(or (integer? x) (char? x)))
(cond ((string? o)
(string->char-set o))
((symbol? o)
(cdr (assq o names)))
((and (pair? o) (not (pair? (cdr o))))
(ascii-range->char-set (->ascii (car o))
(+ 1 (->ascii (cdr o)))))
((list? o)
(reduce char-set-union (chars->char-set
(map ascii->char (filter integer? o)))
(map (lambda (pair)
(ascii-range->char-set (->ascii (car pair))
(+ 1 (->ascii (cdr pair)))))
(filter pair? o))))
(map ->char (filter integer-or-char? o)))
(map (lambda (o) (input->char-set o names))
(filter (invert-predicate integer-or-char?) o))))
(else (my-error "Bad char set specification ~s" o))))
;; Depends on charsets implemented as strings
......@@ -160,8 +179,7 @@
;; Destructivly intersect the leafs of two trees
(define (tree-intersect! cache t1 t2)
(if (eq? t1 t2)
t1
(if (not (eq? t1 t2))
(let* ((s1 (tree/set t1))
(s2 (tree/set t2))
(intersection (char-set-intersection s1 s2)))
......@@ -195,9 +213,6 @@
(tree-intersect! cache t2 c))
t1))))))
(define-syntax when
(syntax-rules () ((when test . consequences) (if test (begin . consequences)))))
(define (partition! sets)
(let ((cache (make-tree-cache sets)))
(let loop ((left sets))
......@@ -231,21 +246,31 @@
leafs)
table))
(define (build-mask root)
(define (build-flags root)
(map-leafs tree/index root))
(define (prepare-input l)
(map (lambda (pair)
(cons (car pair) (make-tree (input->char-set (cdr pair)))))
l))
(let loop ((left l) (out '()) (names '()))
(if (null? left)
out
(let* ((name (caar left))
(input (cdar left))
(set (input->char-set input
names)))
(werror "Read class ~a\n" name)
(loop (cdr left)
(cons (cons name
(make-tree set))
out)
(cons (cons name set) names))))))
#!
(define test-input
'((alpha . ( (#\a . #\z) (#\A . #\Z) ))
'((alpha . ( (#\a . #\z) (#\A . #\Z) ))
(digits . ( (#\0 . #\9)))
(base64 . ( (#\a . #\z) (#\A . #\Z) (#\0 . #\9) #\+ #\/ #\=))
(hex . ( (#\0 . #\9) (#\a . #\f) (#\A . #\F) ))))
(define (test1)
(let* ((input (prepare-input test-input))
(roots (map cdr input)))
......@@ -253,10 +278,6 @@
(let ((leafs (index-leafs! roots)))
(werror "~s disjunct classes found." (length leafs))
(build-char-table leafs))))
#!
(map (lambda (pair)
(cons (car pair) (tree-describe (cdr pair))))
input)))
!#
(define (bit->mask bit)
......@@ -278,6 +299,29 @@
(for-each (lambda (class)
(format #t "#define CHAR_~a (~a)\n"
(car class)
(implode " | " (map bit->mask (build-mask (cdr class))))))
(implode " | " (map bit->mask (build-flags (cdr class))))))
classes)
(format #t "#define CHAR_other 1\n")))
(define test-2-input
'((lower . "abcdefghijklmnopqrstuvwxyz")
(upper . "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
(alpha . (lower upper))
(digits . "0123456789")
(hex . (digits "abcdefABCDEF"))
;; base 64 digits, including the '=' pad character
(base64 . (alpha digits "+/="))
(control . ( (0 . #x1f) (#x80 . #x9f) #x7f))
;; SPC, TAB, LF, CR
(space . (#x20 #x9 #xa #xd))
;; \b \t \n \v \f \r
(escapable . (#x8 #x9 #xa #xb #xc #xd))
(punctuation . "-./_:*+=")
;; Characters defined by most iso-8859-1 character sets
(international . (#xa0 . #xff))))
(define (test-2)
(make-char-classes test-2-input))
(define (main . ignored)
(make-char-classes (read)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment