Skip to content

Commit

Permalink
Merge branch 'ct-logs-main' of github.com:aau-network-security/richki…
Browse files Browse the repository at this point in the history
…t into ct-logs-main
  • Loading branch information
gianmarcomennecozzi committed Apr 23, 2020
2 parents a84e8e0 + 9610e5b commit 9ce2e09
Show file tree
Hide file tree
Showing 28 changed files with 635 additions and 199 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.pytest_cache
.githooks
.docs
.github/logo
.github/workflows

142 changes: 142 additions & 0 deletions .githooks/check-branch-name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import argparse
import re
import sys


def check(name):
"""Check a git branch name against gitflow naming conventions.
This is most likely the function you are looking for.
"""
if name in ( # First level only branches
'master',
'develop',
):
return True
elif len(name.split('/')) == 2:
# some have two levels separated by /
return checkSecondLevel(name)
else:
# Default
print(f'Error: Did not recognise "{name}" as a valid branch.')
return False


def checkLen(string, min_len, max_len):
if len(string) < min_len:
print(
f'Error: {string} is too short'
f' (it is {len(string)}, minimum is {min_len})'
)
return False
if len(string) > max_len:
print(
f'Error: {string} is too long'
f' (it is {len(string)}, maximum is {max_len})'
)
return False
else:
return True


def checkSecondLevel(name):
"""Checks the name to be a valid gitflow branch name containing a `/`.
This is intended for internal use, and asumes a single `/` to be
present in `name`.
"""
category, label = name.split('/')

if category in ( # valid categories
'feature',
'hotfix',
):
return checkLabel(label)
elif category in ( # Not currently validating release branch names
'release',
):
return True
else:
print(f'Error: Did not recognise "{category}" as a valid category')
return False


def checkLabel(label):
"""Checks the label to have a description of one or more words
(lowercase alphanumerics), joined by a dash (`-`), followed by an
issue reference.
Example: word-and-numb3r-#1
"""
# Description
desc_re = r'(?P<description>[a-z0-9]+(?:-[a-z0-9]+)*)' # one or more words
desc_re = r'^' + desc_re # must be at begining
m = re.search(desc_re, label)
if not m:
print(
f'Error: No valid description in "{label}"'
f' (Expected it to start with lowercase alphanumeric and dashes'
f' like this: ex4mple-description)'
)
return False

if not checkLen(m.groupdict()['description'], 10, 25):
return False

# Issue reference
issue_re = r'(?P<issue>#[0-9]+)' # hashtag and integer
issue_re = issue_re + r'$' # must be at end
if not re.search(issue_re, label):
print(
f'Error: No issue reference in "{label}"'
f' (Expected it to in like this: ...-#1)'
)
return False

# Dash seperator
label_re = desc_re + r'-' + issue_re
if not re.search(label_re, label):
print(
f'Error: Missing dash between description and issue reference '
f' in "{label}"'
)
return False

return True # no problems found


if __name__ == "__main__":

parser = argparse.ArgumentParser(
description='Validate branch name according to gitflow',
)
parser.add_argument(
'-t', '--test', dest='test', action='store_const',
const=True, default=False,
help='Run the built in tests and exit',
)
parser.add_argument(
'name', metavar='NAME', type=str,
help='The branch name to check'
)
args = parser.parse_args()

if not args.test:
success = check(args.name)
sys.exit(not success)

print('Starting built-in self-testing')
print('Expect error messages, but not AssertionError\'s')
assert check('master')
assert check('develop')
assert not check('random') # no custom at top level
assert not check('alkshjdg') # no custom at top level
assert not check('master/asdasdasdasdasdasd') # nothing below master
assert not check('develop/asdasdasdasdasdas') # nothing below develop
assert check('feature/some-feature-#9') # good
assert not check('feature/2-shrt-fe#1') # too short
assert not check('feature/very-long-description-here-#1') # too long
print('Done - either all tests passed or you disable `assert`')
25 changes: 25 additions & 0 deletions .githooks/pre-commit.linux.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash

## Would be nice to have linting before commit
if ! [ -x "$(command -v autopep8)" ] || [ "$(pip3 list |
cut -d " " -f 1 |
grep -xF "$package_name" | grep autopep8)" != "autopep8" ]
then
echo 'autopep8 is NOT installed, linting test may fail on CI ... '
echo 'consider to install autopep8, you may use following commands: '
echo 'Debian: [ sudo apt-get install -y python-autopep8 ] '
echo 'MacOS: [ brew install autopep8 ]'
echo 'You may consider to install it into virtual environment of your project:'
echo 'source venv/bin/activate'
echo 'pip3 install autopep8'
echo 'autopep8 should be available in your system, to do not face with linting problem.'
exit 1
else
echo 'Linting...'
echo 'Going to root directory of the project'
cd ../richkit
autopep8 --in-place --recursive --max-line-length=100 --exclude docs/source/conf.py,venv,__pycache__,old,build,dist .
fi

python3 .githooks/check-branch-name.py "$(git rev-parse --abbrev-ref HEAD)"
exit $?
30 changes: 30 additions & 0 deletions .github/local-test/run-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env sh

NC='\033[0m'
RED='\033[0;31m'
ORANGE='\033[0;33m'
GREEN='\033[0;32m'

if [ "$MAXMIND_LICENSE_KEY" = "" ] ; then
echo "${ORANGE} Warning: Environment variable for MAXMINDDB could not be found, proceeding without it, check README file "
fi
# change directory to /richkit

cd /richkit

echo "${GREEN}1. Checking flake8 linting ... "
# test that number of violations does not increase
FLAKE8_ERROR_CNT=$(flake8 . -qq --count --exit-zero --max-complexity=10 --max-line-length=127 --exclude venv,__pycache__,docs/source/conf.py,old,build,dist)
FLAKE8_ERROR_LIMIT=25
if [ "$FLAKE8_ERROR_CNT" -gt "$FLAKE8_ERROR_LIMIT" ] ; then
echo "${RED}Failed because the number of errors from flake8 increased (This: $FLAKE8_ERROR_CNT Previously: $FLAKE8_ERROR_LIMIT)" 1>&2
false
exit 1
fi
echo "${ORANGE}Number of validation errors from flake8 is: $FLAKE8_ERROR_CNT (Limit is: $FLAKE8_ERROR_LIMIT)"


echo "${GREEN}2. Testing module .... "
echo "${NC}"
coverage run --source=richkit -m pytest -Werror /richkit/richkit

22 changes: 19 additions & 3 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,30 @@ jobs:
- name: Check that number of pep8 violations is not going up
run: |
# test that number of violations does not increase
FLAKE8_ERROR_CNT=$(flake8 . -qq --count --exit-zero --max-complexity=10 --max-line-length=127)
FLAKE8_ERROR_LIMIT=308
FLAKE8_ERROR_CNT=$(flake8 . -qq --count --exit-zero --max-complexity=10 --max-line-length=127 --exclude venv,__pycache__,docs/source/conf.py,old,build,dist)
FLAKE8_ERROR_LIMIT=25
if [ "$FLAKE8_ERROR_CNT" -gt "$FLAKE8_ERROR_LIMIT" ] ; then
echo "Failed because the number of errors from flake8 increased (This: $FLAKE8_ERROR_CNT Previously: $FLAKE8_ERROR_LIMIT)" 1>&2
false
fi
echo "Number of validation errors from flake8 is: $FLAKE8_ERROR_CNT (Limit is: $FLAKE8_ERROR_LIMIT)"
formalities:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: 3.7
- name: Extract branch name
shell: bash
run: echo "::set-env name=BRANCH_NAME::$(echo ${GITHUB_REF#refs/heads/})"
- name: Check branch name
run: |
echo "Checking ${BRANCH_NAME}..."
python3 .githooks/check-branch-name.py "$BRANCH_NAME"
test:
runs-on: ${{ matrix.os }}
strategy:
Expand All @@ -59,7 +75,7 @@ jobs:
env:
MAXMIND_LICENSE_KEY: ${{ secrets.MAXMIND_LICENSE_KEY }}
run: |
coverage run --source=richkit -m pytest -Werror
coverage run --source=richkit -m pytest -Werror --ignore src/python-whois
- name: Coverage report
run: |
coverage report --fail-under=79
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pythonpublish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PIP_TOKEN }}
run: |
python setup.py sdist bdist_wheel
twine upload --repository-url https://test.pypi.org/legacy/ dist/*
## Initialize release process && use TestPyPi to perform and check upload first before uploading to pypi.org
twine upload dist/*
## Initialize release process
- name: Checkout code
uses: actions/checkout@master
- name: Create Release
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ coverage.xml
richkit/test/.DS_Store
# Local data
richkit/retrieve/data/*.txt
richkit/test/analyse/data/*.csv
# Translations
*.mo
*.pot

categories_list.txt
# Django stuff:
*.log
local_settings.py
Expand Down
22 changes: 22 additions & 0 deletions Dockerfile.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM ubuntu

# provide environment variable as MAXMIND_LICENSE_KEY
# when you run docker image see readme


RUN apt-get update && apt-get install -y python3 python3-pip

COPY requirements.txt /richkit/requirements.txt

COPY richkit /richkit/richkit


RUN pip3 install -r /richkit/requirements.txt

RUN pip3 install coverage pytest sphinx flake8

COPY .github/local-test/run-test.sh /richkit/richkit/run-test.sh

CMD ["/richkit/richkit/run-test.sh"]


28 changes: 27 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ Documentation can be found at https://richkit.readthedocs.io/en/latest/.

In order to install richikit just type in the terminal `pip install richkit`


## Usage

The following codes can be used to retrieve the TLD and the URL category, respectively.
Expand Down Expand Up @@ -89,6 +88,33 @@ Richkit define a set of functions categorized by the following modules:

- `richkit.retrieve`: This module provides the ability to retrieve data on domain names of any sort. It comes without the "confidentiality contract" of `richkit.lookup`.

## Run Tests on Docker

In order to prevent any problems regarding to environment, we are providing `Dockerfile.test` file which basically constructs a docker image to run tests of Richkit.

- The only thing to add is just `MAXMIND_LICENCE_KEY` in `.github/local-test/run-test.sh` at line 3. It is required to pass the test cases for `lookup` module.

Commands to test them in Docker environment.

- `docker build -t richkit-test -f Dockerfile.test . ` : Builds required image to run test cases

- `docker run -e MAXMIND_LICENSE_KEY="<licence-key> " richkit-test ` : Runs `run-test.sh` file in Docker image.


## Contributing

Contributions are most welcome.

We use the [gitflow](https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow)
branching strategy, so if you plan to push a branch to this repository
please follow that. Note that we test branch names with
`.githooks/check-branch-name.py`. The git pre-commit hook can be used
to automatically check this on commit. An example that can be used
directly as follows is available on linux, and can be enabled like
this (assuming `python>=3.6` and `bash`):

ln -s $(pwd)/.githooks/pre-commit.linux.sample $(pwd)/.git/hooks/pre-commit

## Credits

- Logo designed by [indepedenthand](https://www.behance.net/independenthand)
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ maxminddb
numpy==1.17.2
scikit-learn==0.21.3
langid==1.1.6
urllib3==1.25.6
bs4==0.0.1
lxml==4.4.1
requests==2.22.0
whois==0.9.3
# when this branch is merged into upstream and released
-e git://github.com/aau-network-security/pywhois.git@release-for-richkit#egg=python-whois
# replace the line with:
# python-whois
Loading

0 comments on commit 9ce2e09

Please sign in to comment.