forked from mikeizbicki/html_validator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHTML_Validator.py
70 lines (60 loc) · 1.89 KB
/
HTML_Validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/python3
import re
def validate_html(html):
'''
This function performs a limited version of html
validation by checking whether every opening tag
has a corresponding closing tag!
>>> validate_html('<strong>example</strong>')
True
>>> validate_html('<strong>example')
False
'''
# HINT:
# use the _extract_tags function below to generate a
# list of html tags without any extra text;
# then process these html tags using the balanced
# parentheses algorithm from the class/book
# the main difference between your code and
# the code from class will be that you will have
# to keep track of not just the 3 types of parentheses,
# but arbitrary text located between the html tags
taglist = _extract_tags(html)
if len(html) == 0:
return True
if not taglist:
return False
stack = []
index = 0
balanced = True
while index < len(taglist) and balanced:
tag = taglist[index]
tagname = tag[1:-1]
if '/' not in tag:
stack.append(tagname)
else:
if len(stack) == 0:
balanced = False
else:
item = stack.pop()
if not item == tagname[1:]:
balanced = False
index += 1
if balanced and not stack:
return True
else:
return False
def _extract_tags(html):
'''
This is a helper function for `validate_html`.
By convention in Python, helper functions that are
not meant to be used directly by the
user are prefixed with an underscore.
This function returns a list of all
the html tags contained in the input string,
stripping out all text not contained within angle brackets.
>>> _extract_tags('Python <strong>rocks</strong>!')
['<strong>', '</strong>']
'''
tags = re.findall(r'<[^>]+>', html)
return tags