Skip to content

Commit

Permalink
whitespace normalizer
Browse files Browse the repository at this point in the history
  • Loading branch information
dlichteblau committed Dec 29, 2005
1 parent 4df5d1d commit b0615af
Show file tree
Hide file tree
Showing 14 changed files with 135 additions and 11 deletions.
3 changes: 2 additions & 1 deletion README.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<li>
<ul class="hack">
<li>
<a href="doc/using.html#quickstart"><b>Quick-Start Example</b></a>
<a href="doc/quickstart.html"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
Expand Down Expand Up @@ -112,6 +112,7 @@ <h2>Recent Changes</h2>
<li>UTF-8 string support in DOM on Lisps without Unicode characters.</li>
<li>Sink API has been changed.</li>
<li>Support internal subset serialization.</li>
<li>Whitespace normalizer.</li>
<li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
<li>Use trivial-gray-streams.</li>
</ul>
Expand Down
1 change: 1 addition & 0 deletions cxml.asd
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
(:file "xmls-compat" :depends-on ("xml-parse"))
(:file "recoder" :depends-on ("xml-parse"))
(:file "xmlns-normalizer" :depends-on ("xml-parse"))
(:file "space-normalizer" :depends-on ("xml-parse"))
(:file "catalog" :depends-on ("xml-parse"))
(:file "sax-proxy" :depends-on ("xml-parse")))
:depends-on (:cxml-runes :puri :trivial-gray-streams))
Expand Down
2 changes: 1 addition & 1 deletion doc/dom.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
<a href="quickstart.html"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
Expand Down
4 changes: 2 additions & 2 deletions doc/installation.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
<a href="quickstart.html"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
Expand Down Expand Up @@ -116,7 +116,7 @@ <h2>Compilation</h2>
<pre>* (asdf:operate 'asdf:load-op :cxml)</pre>

<p>
You can then try the <a href="using.html#quickstart">quick-start example</a>.
You can then try the <a href="quickstart.html">quick-start example</a>.
</p>

<a name="tests"/>
Expand Down
2 changes: 1 addition & 1 deletion doc/quickstart.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
<a href="quickstart.html"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
Expand Down
31 changes: 30 additions & 1 deletion doc/using.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
<a href="quickstart.html"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
Expand Down Expand Up @@ -388,6 +388,32 @@ <h3>Miscellaneous SAX handlers</h3>
<tt>start-element</tt> events before passing them on the next
handler.
</p>
<p>
<div class="def">Function CXML:MAKE-WHITESPACE-NORMALIZER (chained-handler &optional dtd)</div>
Return a SAX handler which removes whitespace from elements that
have <em>element content</em> and have not been declared to
preserve space using an xml:space attribute.
</p>
<p>Example:</p>
<pre>(cxml:parse-file "example.xml"
(cxml:make-whitespace-normalizer (cxml-dom:make-dom-builder))
:validate t)</pre>
<p>Example input:</p>
<pre>&lt;!DOCTYPE test [
&lt;!ELEMENT test (foo,bar*)>
&lt;!ATTLIST test a CDATA #IMPLIED>
&lt;!ELEMENT foo #PCDATA>
&lt;!ELEMENT bar (foo?)>
&lt;!ATTLIST bar xml:space (default|preserve) "default">
]>
&lt;test a='b'>
&lt;foo> &lt;/foo>
&lt;bar> &lt;/bar>
&lt;bar xml:space="preserve"> &lt;/bar>
&lt;/test>
</pre>
<p>Example result:</p>
<pre>&lt;test a="b">&lt;foo> &lt;/foo>&lt;bar>&lt;/bar>&lt;bar xml:space="preserve"> &lt;/bar>&lt;/test></pre>

<a name="rods"/>
<h3>Recoders</h3>
Expand Down Expand Up @@ -572,6 +598,9 @@ <h2>SAX Interface</h2>
<div class="def">Accessor SAX:ATTRIBUTE-QNAME (attribute)</div>
<div class="def">Accessor SAX:ATTRIBUTE-SPECIFIED-P (attribute)</div>
<div class="def">Accessor SAX:ATTRIBUTE-VALUE (attribute)</div>
<br/>
<div class="def">Function SAX:FIND-ATTRIBUTE (qname attributes)</div>
<div class="def">Function SAX:FIND-ATTRIBUTE-NS (uri lname attributes)</div>
</p>
<p>
The entity declaration methods are similar to Java SAX
Expand Down
2 changes: 1 addition & 1 deletion doc/xmls-compat.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
<a href="quickstart.html"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
Expand Down
1 change: 1 addition & 0 deletions runes/ystream.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@
#+rune-is-integer
(progn
(defstruct (character-stream-ystream/utf8
(:constructor make-character-stream-ystream/utf8 (os-stream))
(:include %stream-ystream)
(:conc-name "YSTREAM-")))

Expand Down
1 change: 1 addition & 0 deletions xml/package.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,6 @@
#:sax-proxy
#:proxy-chained-handler
#:make-namespace-normalizer
#:make-whitespace-normalizer
#:rod-to-utf8-string
#:utf8-string-to-rod))
6 changes: 5 additions & 1 deletion xml/recoder.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,8 @@

(defwrapper sax:entity-resolver
(resolver)
resolver))
resolver)

(defwrapper sax::dtd
(dtd)
dtd))
25 changes: 24 additions & 1 deletion xml/sax-handler.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
#:*use-xmlns-namespace*

#:make-attribute
#:find-attribute
#:find-attribute-ns
#:attribute-namespace-uri
#:attribute-local-name
#:attribute-qname
Expand Down Expand Up @@ -137,6 +139,23 @@ Setting this variable has no effect unless both
value
specified-p)

(defun %rod= (x y)
;; allow rods *and* strings *and* null
(cond
((zerop (length x)) (zerop (length y)))
((zerop (length y)) nil)
((stringp x) (string= x y))
(t (runes:rod= x y))))

(defun find-attribute (qname attrs)
(find qname attrs :key #'attribute-qname :test #'%rod=))

(defun find-attribute-ns (uri lname attrs)
(find-if (lambda (attr)
(and (%rod= uri (sax:attribute-namespace-uri attr))
(%rod= lname (sax:attribute-local-name attr))))
attrs))

(defgeneric start-document (handler)
(:documentation "Called at the beginning of the parsing process,
before any element, processing instruction or comment is reported.
Expand Down Expand Up @@ -325,7 +344,11 @@ finished, if present.")
(:documentation
"Called between sax:end-dtd and sax:end-document to register an entity
resolver, a function of two arguments: An entity name and SAX handler.
When called, the resolver function will parse the named entities data.")
When called, the resolver function will parse the named entity's data.")
(:method ((handler t) resolver)
(declare (ignore resolver))
nil))

;; internal for now
(defgeneric dtd (handler dtd)
(:method ((handler t) dtd) (declare (ignore dtd)) nil))
3 changes: 2 additions & 1 deletion xml/sax-proxy.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@
(define-proxy-method sax:notation-declaration (name public-id system-id))
(define-proxy-method sax:element-declaration (name model))
(define-proxy-method sax:attribute-declaration (elt attr type default))
(define-proxy-method sax:entity-resolver (resolver)))
(define-proxy-method sax:entity-resolver (resolver))
(define-proxy-method sax::dtd (dtd)))
62 changes: 62 additions & 0 deletions xml/space-normalizer.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
;;;; space-normalizer.lisp -- whitespace removal
;;;;
;;;; This file is part of the CXML parser, released under Lisp-LGPL.
;;;; See file COPYING for details.
;;;;
;;;; Copyright (c) 2005 David Lichteblau

(in-package :cxml)

(defclass whitespace-normalizer (sax-proxy)
((attributes :initform '(t) :accessor xml-space-attributes)
(models :initform nil :accessor xml-space-models)
(dtd :initarg :dtd :accessor xml-space-dtd)))

(defun make-whitespace-normalizer (chained-handler &optional dtd)
(make-instance 'whitespace-normalizer
:dtd dtd
:chained-handler chained-handler))

(defmethod sax::dtd ((handler whitespace-normalizer) dtd)
(unless (xml-space-dtd handler)
(setf (xml-space-dtd handler) dtd)))

(defmethod sax:start-element
((handler whitespace-normalizer) uri lname qname attrs)
(declare (ignore uri lname))
(let ((dtd (xml-space-dtd handler)))
(when dtd
(let ((xml-space
(sax:find-attribute (if (stringp qname) "xml:space" #"xml:space")
attrs)))
(push (print(if xml-space
(rod= (rod (sax:attribute-value xml-space)) #"default")
(car (xml-space-attributes handler))))
(xml-space-attributes handler)))
(let* ((e (cxml::find-element (rod qname) dtd))
(cspec (when e (cxml::elmdef-content e))))
(push (and (consp cspec)
(not (and (eq (car cspec) '*)
(let ((subspec (second cspec)))
(and (eq (car subspec) 'or)
(eq (cadr subspec) :PCDATA))))))
(xml-space-models handler)))))
(call-next-method))

(defmethod sax:characters ((handler whitespace-normalizer) data)
(cond
((and (xml-space-dtd handler)
(car (xml-space-attributes handler))
(car (xml-space-models handler)))
(unless (every #'white-space-rune-p (rod data))
(warn "non-whitespace character data in element content")
(call-next-method)))
(t
(call-next-method))))

(defmethod sax:end-element ((handler whitespace-normalizer) uri lname qname)
(declare (ignore uri lname qname))
(when (xml-space-dtd handler)
(pop (xml-space-attributes handler))
(pop (xml-space-models handler)))
(call-next-method))
3 changes: 2 additions & 1 deletion xml/xml-parse.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -2513,7 +2513,8 @@
(let ((dtd (dtd *ctx*)))
(sax:entity-resolver
(handler *ctx*)
(lambda (name handler) (resolve-entity name handler dtd))))
(lambda (name handler) (resolve-entity name handler dtd)))
(sax::dtd (handler *ctx*) dtd))
(list :DOCTYPE name extid))))

(defun report-cached-dtd (dtd)
Expand Down

0 comments on commit b0615af

Please sign in to comment.