Skip to content

Commit

Permalink
Merge pull request #129 from aau-network-security/feature/ct-logs-#94
Browse files Browse the repository at this point in the history
Feature/ct logs #94
  • Loading branch information
gianmarcomennecozzi authored Apr 23, 2020
2 parents 7b1a62f + 9ce2e09 commit 6c11448
Show file tree
Hide file tree
Showing 7 changed files with 478 additions and 7 deletions.
19 changes: 13 additions & 6 deletions richkit/analyse/analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def get_nld(domain, n):
:param n: Label number (int)
:return: Effective N'th-level Domain
"""

if abs(n) == 1:
nld = get_tld(domain)
elif len(domain.split('.')) <= n:
Expand All @@ -95,6 +96,7 @@ def get_n_label(domain, n):
:param n: Label number (int)
:return: Effective N'th-level label
"""

if abs(n) == 1:
n_label = get_tld(domain)
elif abs(n) == 2:
Expand All @@ -121,11 +123,11 @@ def get_domain_name_features(domain):
"""
domain_array = domain.split('.')
num_tokens = len(domain_array)
len2ld = len(get_sld(domain))
len2ld = len(get_sld(domain))
len_domain = sum([len(el) for el in domain_array])
domain_name_features = {
"num_tokens": str(num_tokens),
"len2ld": str(len2ld),
"len2ld" : str(len2ld),
"len_domain": str(len_domain)
}
return domain_name_features
Expand Down Expand Up @@ -166,6 +168,7 @@ def get_grams_alexa_2ld(domain, analyzer='char', ngram_range=(3, 5), is_test=Fal
counts_matrix = alexa_vc.fit_transform(alexa_slds)
alexa_counts = np.log10(counts_matrix.sum(axis=0).getA1())
grams_alexa2ld = ngram_count(get_sld(domain), alexa_counts, alexa_vc)

return float(grams_alexa2ld)


Expand All @@ -183,6 +186,7 @@ def get_grams_dict_2ld(domain, is_test=False):
counts_matrix = dict_vc.fit_transform(words)
dict_counts = np.log10(counts_matrix.sum(axis=0).getA1())
grams_dict2ld = ngram_count(get_sld(domain), dict_counts, dict_vc)

return float(grams_dict2ld)


Expand All @@ -202,6 +206,7 @@ def get_num_of_vowels_2ld(domain):
:param domain:
:return: number of counts: vowels in 2ld
"""

sld = get_sld(domain)
vowels = list("aeiouy")
return str(sum([sld.count(c) for c in vowels]))
Expand Down Expand Up @@ -265,19 +270,21 @@ def ngram_count(domain, counts, counts_vc):
return str(match[0])


def get_num_numeric_2ld(s):
def get_num_numeric_2ld(domain):

"""
:param domain:
:return: ratio of special characters in 2ld
"""
return str(len([c for c in s if c.isdigit()]))
return str(len([c for c in domain if c.isdigit()]))


def get_radio_numeric_2ld(domain):

def get_radio_numeric_2ld(s):
"""
:param domain:
:return: ratio of special characters in 2ld
"""
return str(float(get_num_numeric_2ld(s)) / float(len(get_sld(s))))
return str(float(get_num_numeric_2ld(domain)) / float(len(get_sld(domain))))
95 changes: 95 additions & 0 deletions richkit/retrieve/cert_sh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import requests
import json
import logging
from richkit.retrieve.x509 import X509
from datetime import datetime

logger = logging.getLogger(__name__)


class DomainCertificates:
"""
This class provides the functions to get certificates of a given domain.
The website used to get them is crt.sh
"""

# Website used to retrieve the certificates belonging a domain
crtSH_url = "https://crt.sh/{}"

def __init__(self, domain):
"""
Get the certificate features from the given domain
:param domain: domain to analyze
"""
self.domain = domain
self.certificates = self.get_certificates(self.domain)
self.certificates_features = None

def get_certificates(self, domain):
"""
Make a request and get the response content of the given domain
:param domain: the choosen domain
"""
try:
r = requests.get(self.crtSH_url.format("?q=" + domain + "&output=json"))
if r.status_code != 200:
raise Exception("Server not available")
content = r.content.decode('utf-8')
if len(r.text) == 2: # It's 2 when the domain is not found
raise Exception("Domain not found")
return json.loads(content)
except Exception as e:
logger.error('Error while retrieving certificates: %s', e)
raise e

def get_all(self):
"""
Get the list of certificates for the given domain and the certificate features for each of them
"""
certs_features = []
for cert in self.certificates:
# filter out all the rows containing @ because they are email
# example: https://crt.sh/?id=34083306
cf = X509(cert.get('id'))
not_before = cert.get('not_before')
not_after = cert.get('not_after')
not_before_obj = datetime.strptime(not_before, "%Y-%m-%dT%H:%M:%S")
not_after_obj = datetime.strptime(not_after, "%Y-%m-%dT%H:%M:%S")
validity = (not_after_obj.date() - not_before_obj.date()).days
features = dict({
'ID': cert.get('id'),
'Issuer': cert.get('issuer_name'),
'Algorithm': cf.algorithm,
'ValidationL': cf.policy_list,
'NotBefore': not_before,
'NotAfter': not_after,
'Validity': validity, # days
'SANFeatures': cf.certificates_features
})
certs_features.append(features)
self.certificates_features = certs_features
return certs_features

def get_certificates_list(self):
"""
Get the list of certificates for the given domain
"""
certs_features = []
for cert in self.certificates:
# filter out all the rows containing @ because they are email
# example: https://crt.sh/?id=34083306
not_before = cert.get('not_before')
not_after = cert.get('not_after')
not_before_obj = datetime.strptime(not_before, "%Y-%m-%dT%H:%M:%S")
not_after_obj = datetime.strptime(not_after, "%Y-%m-%dT%H:%M:%S")
validity = (not_after_obj.date() - not_before_obj.date()).days
features = dict({
'ID': cert.get('id'),
'Issuer': cert.get('issuer_name'),
'NotBefore': not_before,
'NotAfter': not_after,
'Validity': validity, # days
})
certs_features.append(features)
self.certificates_features = certs_features
return certs_features
38 changes: 38 additions & 0 deletions richkit/retrieve/ctlogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from richkit.retrieve.cert_sh import DomainCertificates
from richkit.retrieve.x509 import X509


def get_logs(domain):
"""
Get a list of certificates with all the features
:param domain: Input domain
"""
try:
certs = DomainCertificates(domain)
return certs.get_all()
except Exception as e:
print(e)


def get_certificates(domain):
"""
Get just the list of certificates of the domain
:param domain: Input domain
"""
try:
certs = DomainCertificates(domain)
return certs.get_certificates_list()
except Exception as e:
print(e)


def get_certificates_features(cert_id):
"""
Get the certificate features by certificate ID
:param cert_id: crt.sh certificate ID
"""
try:
cert = X509(cert_id)
return cert.certificates_features
except Exception as e:
print(e)
2 changes: 1 addition & 1 deletion richkit/retrieve/dns.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dns import reversename
from dns import resolver
from dns import reversename
import logging

logger = logging.getLogger(__name__)
Expand Down
Loading

0 comments on commit 6c11448

Please sign in to comment.