diff --git a/Makefile b/Makefile index 3671a7fb3be5cfe9156823736902862a5349f172..18802f1579f65fb06b20450b78b409eb7b544f9a 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,10 @@ install: all cp -ra --no-preserve=ownership ccache/* $(DESTDIR)$(GUILE_CCACHE_DIR) install -Dt ${DESTDIR}/usr/lib/ libguile-gumbo.so +test_env=LD_LIBRARY_PATH=$$PWD GUILE_LOAD_PATH=$$PWD GUILE_LOAD_COMPILED_PATH=$$PWD/ccache + check: all - env GUILE_LOAD_PATH=$$PWD GUILE_LOAD_COMPILED_PATH=$$PWD/ccache guile -s tests.scm + env ${test_env} guile -s tests.scm clean: -rm *.o diff --git a/guile-gumbo.c b/guile-gumbo.c index c85d6838d6bd9d2ca41e1d1a17802a7e68dec7eb..caa87093dd9fc888b7abc6addeb3939e3a149bf9 100644 --- a/guile-gumbo.c +++ b/guile-gumbo.c @@ -56,7 +56,7 @@ SCM_DEFINE_PUBLIC (scm_parse_html, "parse-html", 2, 0, 0, #undef FUNC_NAME SCM scm_parse_children (GumboVector* /* GumboNode */ children, options* opts) { - SCM childvec = + SCM childvec = scm_c_make_vector ( children->length, SCM_UNSPECIFIED); @@ -70,8 +70,18 @@ SCM scm_parse_children (GumboVector* /* GumboNode */ children, options* opts) { } SCM gumbo_element_to_sxml (GumboElement* el, options* opts) { - SCM tag = scm_from_utf8_symbol - (gumbo_normalized_tagname(el->tag)); + SCM tag; + if (el->tag < GUMBO_TAG_UNKNOWN) { + tag = scm_from_utf8_symbol(gumbo_normalized_tagname(el->tag)); + } else { + /* Unknown tag */ + GumboStringPiece strp = el->original_tag; + gumbo_tag_from_original_text(&strp); + // assert strp->length > 0 ??? + SCM str = scm_from_utf8_stringn(strp.data, strp.length); + str = scm_string_downcase(str); + tag = scm_string_to_symbol(str); + } SCM scm_children = scm_parse_children (&el->children, opts); GumboVector* attributes = &el->attributes; diff --git a/tests.scm b/tests.scm index 5b07d1950a1a91108847e483ea53b0df093fb916..ec15ca2520252d0783fc1d4be31043a6fb56b483 100644 --- a/tests.scm +++ b/tests.scm @@ -19,7 +19,7 @@ '(*TOP* (html (head) (body (p) (p)))) (html->sxml "<p></p> <p></p>" #:trim-whitespace? #t)) - + (test-equal "Keep whitespace" '(*TOP* (html (head) (body (p) " " (p)))) (html->sxml "<p></p> <p></p>" @@ -29,12 +29,28 @@ '(*TOP* (doctype html) (html (head) (body "Hello"))) (html->sxml "<!doctype HTML>Hello" #:full-document? #t)) - + (test-equal "HTML only" '(html (head) (body "Hello")) (html->sxml "<!doctype HTML>Hello" #:full-document? #f)) +(test-equal "Unknown tag" + '(html (head) (body (unknown-tag))) + (html->sxml "<unknown-tag/>" + #:full-document? #f)) + +(test-equal "unknown tag upper case" + '(html (head) (body (unknown-tag))) + (html->sxml "<UNKNOWN-tag/>" + #:full-document? #f)) + +(test-equal "unknown tag weird characters" + '(html (head) (body (unknown-täg))) + (html->sxml "<unknown-TÄG/>" + #:full-document? #f)) + + (test-end "Gumbo HTML test") (exit (test-runner-fail-count (test-runner-current)))