-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutil.py
137 lines (106 loc) · 4.12 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import ipaddress
import json
import re
import urllib.error
import urllib.request
# Constants
RE_URL = re.compile(r"((?:<.*?>\.)?(?:(?:\*\.?)?[A-Za-z0-9\-]+\.)+(?!md[#)])[a-z]{2,})(?:/.*?(?:\s|$))?")
RE_IPV6 = re.compile(r"(\b(?:[0-9a-f]+:){2,}(?::|[0-9a-fA-F]{1,4})/\d{1,3})")
RE_IPV4 = re.compile(r"(\b(?:\d{1,3}\.){3}\d{1,3}(?:/\d{1,2})?\b)")
def write_list(directory, filename, items):
if items:
with open(directory / filename, "w") as f:
for item in items:
f.write(f"{item}\n")
def natsort_fqdn(s):
split_text = re.split(r"(\d+|\*|\.)", s)
sort_key = []
for text in split_text:
if text.isdigit():
sort_key.append((int(text), text))
else:
sort_key.append((0, text))
return sort_key
# Sorts IPV6 before IVP4
# Also sorts IPV6 with :: notation first since the addresses are expanded
def natsort_ip(ip_list):
def ip_sort_key(ip):
addr = ipaddress.ip_network(ip)
if isinstance(addr, ipaddress.IPv6Network):
return 0, addr.network_address.packed
else:
return 1, addr.network_address.packed
return sorted(ip_list, key=ip_sort_key)
def get_response_data(url):
try:
with urllib.request.urlopen(url) as response:
data = response.read().decode()
return data
except urllib.error.URLError as e:
raise Exception(f"Error: {e} while fetching data from {url}")
def return_fqdn_no_wildcard(url_set):
re_wildcard_start_only = re.compile(r"^\*[^*]*$")
urls_no_wildcard = set()
for url in url_set:
if re.match(re_wildcard_start_only, url):
non_wildcard_url = url[2:]
urls_no_wildcard.add(non_wildcard_url)
elif "*" in url:
# Discard URL with wildcard in the middle or end
pass
else:
# No wildcard
urls_no_wildcard.add(url)
return urls_no_wildcard
def process_ips(ip_input, return_ipv6=False):
ip_with_cidr = {"ipv6": set(), "ipv4": set()}
for ip_addr_s in ip_input:
try:
addr = ipaddress.ip_network(ip_addr_s)
ip_type = "ipv6" if isinstance(addr, ipaddress.IPv6Network) else "ipv4"
if not addr.is_private:
ip_with_cidr[ip_type].add(ip_addr_s)
except ValueError:
# Ignore invalid IP ranges
pass
return ip_with_cidr["ipv6"] if return_ipv6 else ip_with_cidr["ipv4"]
def extract_tables(input_data):
# Regular expression pattern for a Markdown table
pattern = r"(^\|.*\|$\r?\n\|(?:\s|:)?-+.*\|(?:\r?\n\|.*\|)+)"
tables = re.findall(pattern, input_data, re.MULTILINE)
return tables
def md_table_to_dict(table_string):
lines = table_string.split("\n")
ret = []
keys = []
for i, l in enumerate(lines):
if i == 0:
keys = [_i.strip() for _i in l.split("|")]
elif i == 1:
continue
else:
ret.append({keys[_i]: v.strip() for _i, v in enumerate(l.split("|")) if 0 < _i < len(keys) - 1})
return ret
def extract_network_item(source_list, pattern):
result_list = []
for item in source_list:
# Split multiline strings into separate lines
lines = re.split(r"\r?\n|<br\s*/?>", item)
for line in lines:
# Extract potential matches
matches = re.findall(pattern, line)
for match in matches:
if pattern in (RE_IPV4, RE_IPV6):
# Normalize the IPs so single IP gets /32 or /128 appended
result_list.append(str(ipaddress.ip_network(match)))
else:
# Edge case to avoid italicized markdown
if not re.search(r"`\S*{}\S*`".format(re.escape(match)), line):
if match.count("*") > 1:
match = match.replace("*", "")
result_list.append(match.lower())
return result_list
def get_last_commit_date(repo, path):
url = f"https://api.github.com/repos/{repo}/commits?path={path}"
data = json.loads(get_response_data(url))
return data[0]["commit"]["committer"]["date"]