-
Notifications
You must be signed in to change notification settings - Fork 11
Html parsing jewel
usablebydesign edited this page Sep 28, 2010
·
3 revisions
package haxe.text.html;
#if js
using Lambda;
import haxe.hDom.Dom;
#end
class HTMLParser {
#if js
public static function parseIntoElements (s: String): Array<Node> {
var d = Lib.document,
container: HTMLElement = cast d.createElement ("div"),
convert_script = function (node_as_text): Node {var r = ~/^<script ([^>]+)>/, new_script_node = d.createElement ("script");
if (r.match (node_as_text)) ~/(\w+)=(["'])([^\1]*)\1/.customReplace (r.matched (1), function (matches) {
new_script_node.setAttribute (matches.matched (1), matches.matched (3)); return "";});
var r2 = ~/^<script[^>]*>(.*)<\/script>/, can_have_children = untyped (new_script_node.canHaveChildren);
if (r2.match (node_as_text)) if (can_have_children == null || can_have_children) new_script_node.appendChild (d.createTextNode (r2.matched (1)));
else untyped (new_script_node.text = r2.matched (1));
return new_script_node;},
parsed = parse (s);
container.innerHTML = parsed[0];
return Lambda.map ({iterator: function (): Iterator<Int> {return new IntIter (0, container.childNodes.length);}}, function (i) {return container.childNodes[i];}).array().concat (
parsed.slice (1).map (convert_script).array());
}
#end
public static function parse (s: String): Array<String> {
var min = function (x, y) {return x < y ? x : y;},
max = function (x, y) {return x > y ? x : y;},
next = function (s, search: String, mark, previous) {return (previous > mark) ? previous : ((mark = s.indexOf (search, mark)) == -1) ? s.length : mark;},
end_of_string = function (start: Int, s: String) {var mark = start + 1, back_mark = start, quote_mark = start, quote = s.charAt (start);
while ((quote_mark = next (s, quote, mark, quote_mark)) > (back_mark = next (s, "\\", mark, back_mark))) mark = back_mark + 2;
return quote_mark;},
end_of_line_comment = function (start: Int, s: String): Int {return next (s, "\n", start, start);},
end_of_block_comment = function (start: Int, s: String): Int {return next (s, "*/", start + 2, start + 2);},
end_of_cdata = function (start: Int, s: String): Int {return next (s, "]]>", start, start);},
end_of_comment = function (start: Int, s: String): Int {return next (s, "-->", start + 4, start + 4);},
end_of_script = function (start, s: String, l) {var mark = start, line_comment_mark = start, block_comment_mark = start, single_string_mark = start, double_string_mark = start,
cdata_mark = start, close_script_mark = start;
while ((close_script_mark = next (l, "</script>", mark, close_script_mark)) % s.length >
(mark = min (min (min (single_string_mark = next (s, "'", mark, single_string_mark),
double_string_mark = next (s, "\"", mark, double_string_mark)),
cdata_mark = next (s, "<![CDATA[", mark, cdata_mark)),
min (line_comment_mark = next (s, "//", mark, line_comment_mark),
block_comment_mark = next (s, "/*", mark, block_comment_mark)))))
mark = ((mark == single_string_mark || mark == double_string_mark) ? end_of_string :
(mark == line_comment_mark) ? end_of_line_comment :
(mark == block_comment_mark) ? end_of_block_comment :
end_of_cdata) (mark, s) + 1;
return close_script_mark + 9;},
next_script = function (start, s: String, l) {var mark = start, cdata_mark = start, comment_mark = start, script_mark = start;
while ((script_mark = next (l, "<script", mark, script_mark)) % s.length >
(mark = min (cdata_mark = next (s, "<![CDATA[", mark, cdata_mark),
comment_mark = next (s, "<!--", mark, comment_mark))))
mark = ((mark == cdata_mark) ? end_of_cdata : end_of_comment) (mark, s);
return script_mark;},
mark = 0,
last_mark = 0,
scripts = [],
rest = [],
lowercase = s.toLowerCase ();
while ((mark = next_script (mark, s, lowercase)) < s.length) {
rest.push (s.substr (last_mark, mark - last_mark));
scripts.push (s.substr (mark, - (mark - (mark = end_of_script (s.indexOf (">", mark), s, lowercase)))));
last_mark = mark;
}
rest.push (s.substr (last_mark));
return [rest.join("")].concat (scripts);
}
}