-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsanitize_user_input.py
39 lines (32 loc) · 1.38 KB
/
sanitize_user_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
"""
Sanitize the user input,
"""
import re
from urlparse import urljoin
from bs4 import BeautifulSoup, Comment
def sanitize_html(value, base_url=None):
value=value.strip('\n\r')
rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:'))
rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:'))
re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE)
validTags = 'p i strong b u a h1 h2 h3 pre br img'.split()
validAttrs = 'href src width height'.split()
urlAttrs = 'href src'.split() # Attributes which should have a URL
soup = BeautifulSoup(value)
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
# Get rid of comments
comment.extract()
for tag in soup.findAll(True):
if tag.name not in validTags:
tag.hidden = True
attrs = tag.attrs
tag.attrs = []
for attr, val in attrs:
if attr in validAttrs:
val = re_scripts.sub('', val) # Remove scripts (vbs & js)
if attr in urlAttrs:
val = urljoin(base_url, val) # Calculate the absolute url
tag.attrs.append((attr, val))
return soup.renderContents().decode('utf8')
clean_url = sanitize_html('%3Cnav+ondrag=document.body.outerHTML=/XSS3D/%20&refresh=%3Etest', 'https://localhost:8000/tool_runner/index?refresh=refresh&tool_id=')