diff --git a/.github/workflows/crawl.yaml b/.github/workflows/crawl.yaml new file mode 100644 index 0000000..1b92ac3 --- /dev/null +++ b/.github/workflows/crawl.yaml @@ -0,0 +1,42 @@ +name: Crawl Workflow + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m venv .env + source .env/bin/activate + pip install -r requirements.txt + + - name: Test crawler and indexer + run: | + source .env/bin/activate + python3 -m src.phantom --num_threads 2 --urls "https://github.com/AnsahMohammad" "https://github.com/AnsahMohammad" --sleep 6 + + python3 -m nltk.downloader stopwords + python3 -m nltk.downloader punkt + + python3 -m src.phantom_indexing + + echo "Crawling completed" + + - name: test search Engine server + run: | + source .env/bin/activate + python3 phantom.py & + sleep 10 + curl -f http://localhost:5000/ + echo "Flask test completed" diff --git a/crawl.sh b/crawl.sh index adafdca..c56888d 100755 --- a/crawl.sh +++ b/crawl.sh @@ -2,4 +2,4 @@ python3 -m venv .env source .env/bin/activate pip install -r requirements.txt -python3 -m src.phantom --num_threads 10 --urls "https://www.geeksforgeeks.org/" "https://stackoverflow.com/questions" --show_logs True --print_logs True --sleep 60 +python3 -m src.phantom --num_threads 10 --urls "https://www.geeksforgeeks.org/" "https://stackoverflow.com/questions" "https://en.wikipedia.org/wiki/India" "https://developers.cloudflare.com/" --show_logs True --print_logs True --sleep 600 diff --git a/phantom.py b/phantom.py index 6ab58c5..e775a6a 100644 --- a/phantom.py +++ b/phantom.py @@ -18,8 +18,6 @@ def home(): def process_input(input_text): result = engine.query(input_text, count=20) #(doc, score, title) - print("results ; \n\n") - print(result) return result if __name__ == '__main__':