Skip to content

Commit

Permalink
Move build docs functionality to workflow (upload.yml) 2 (#78)
Browse files Browse the repository at this point in the history
* upload.yml now does full docs build.

---------

Signed-off-by: Jack Luar <[email protected]>
  • Loading branch information
luarss authored Oct 27, 2024
1 parent 84bfc37 commit d040878
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: ORAssistant CI
run-name: ${{ github.actor }} started CI

on: [push, pull_request]
# on: [push, pull_request]

jobs:
build-backend-docker:
Expand Down
59 changes: 23 additions & 36 deletions .github/workflows/upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,52 +4,39 @@ on:
workflow_dispatch:

env:
HF_RAG_REPO: The-OpenROAD-Project/ORQA_RAG_datasets
HF_RAG_REPO: The-OpenROAD-Project/ORAssistant_RAG_Dataset
OR_COMMIT_HASH: ffc5760f2df639cd184c40ceba253c7e02a006d5
ORFS_COMMIT_HASH: b94834df01cb58915bc0e8dabf85a314fbd8fb9e
OPENSTA_COMMIT_HASH: 1c7f022cd0a02ce71d047aa3dbb64e924b6efbd5

jobs:
or-manpages:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up prerequisites
working-directory: ./backend
run: |
sudo apt-get update
sudo apt-get install -y make pandoc git
- name: Set up python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install huggingface CLI
run: |
pip install huggingface_hub[cli]
- name: Clone OpenROAD
run: |
git clone https://github.com/The-OpenROAD-Project/OpenROAD
- name: Checkout OpenROAD hash
working-directory: OpenROAD
run: |
git fetch origin ${{ env.OR_COMMIT_HASH }}
git checkout ${{ env.OR_COMMIT_HASH }}
- name: Preprocess manpages
working-directory: OpenROAD/src
run: |
for folder in $(ls -d */); do
cd $folder
../../etc/find_messages.py > messages.txt
cd ..
done
- name: Build manpages
working-directory: OpenROAD/docs
run: |
make clean && make preprocess && make doc
- name: Login to Huggingface
run: |
make init-dev
- name: Populate environment variables
working-directory: ./backend
run: |
rm -f .env && touch .env
echo "OR_REPO_COMMIT=$OR_COMMIT_HASH" >> .env
echo "ORFS_REPO_COMMIT=$ORFS_COMMIT_HASH" >> .env
echo "OPENSTA_REPO_COMMIT=$OPENSTA_COMMIT_HASH" >> .env
- name: Preprocess docs
working-directory: ./backend
run: |
. .venv/bin/activate
python build_docs.py
- name: Upload files
working-directory: ./backend
run: |
. .venv/bin/activate
huggingface-cli login --token ${{ secrets.HF_TOKEN }}
- name: Upload manpages
working-directory: OpenROAD/docs
run: |
huggingface-cli upload ${{ env.HF_RAG_REPO }} ./md /manpages --repo-type dataset
- name: Logout from Huggingface
run: |
huggingface-cli upload ${{ env.HF_RAG_REPO }} ./data / --repo-type dataset
huggingface-cli logout
72 changes: 45 additions & 27 deletions backend/build_docs.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,48 @@
import json
import os
import subprocess
import requests
import sys
import shutil
import json
import logging
import sys

from shutil import copyfile
from dotenv import load_dotenv
from typing import Optional
from bs4 import BeautifulSoup
from huggingface_hub import snapshot_download


load_dotenv()
source_dict: dict[str, str] = {}
cur_dir: str = os.getcwd()

# This code must in run in ./backend
cur_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(cur_dir)

# Get commit hashes from env
or_repo_commit = os.getenv('OR_REPO_COMMIT', 'ffc5760f2df639cd184c40ceba253c7e02a006d5')
orfs_repo_commit = os.getenv(
'ORFS_REPO_COMMIT', 'b94834df01cb58915bc0e8dabf85a314fbd8fb9e'
)
opensta_repo_commit = os.getenv(
'OPENSTA_REPO_COMMIT', '1c7f022cd0a02ce71d047aa3dbb64e924b6efbd5'
)

or_docs_url = 'https://openroad.readthedocs.io/en/latest'
orfs_docs_url = 'https://openroad-flow-scripts.readthedocs.io/en/latest'
opensta_docs_url = 'https://github.com/The-OpenROAD-Project/OpenSTA/raw/1c7f022cd0a02ce71d047aa3dbb64e924b6efbd5/doc/OpenSTA.pdf'
opensta_docs_url = (
'https://github.com/The-OpenROAD-Project/OpenSTA/raw/'
f'{opensta_repo_commit}/doc/OpenSTA.pdf'
)
yosys_html_url = 'https://yosyshq.readthedocs.io/projects/yosys/en/latest'
klayout_html_url = 'https://www.klayout.de/doc.html'
or_website_url = 'https://theopenroadproject.org/'
opensta_readme_url = (
'https://raw.githubusercontent.com/The-OpenROAD-Project/OpenSTA/master/README.md'
'https://raw.githubusercontent.com/The-OpenROAD-Project/OpenSTA/'
f'{opensta_repo_commit}/README.md'
)
or_publications_url = 'https://theopenroadproject.org/publications/'

logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO').upper())

Expand All @@ -38,12 +57,9 @@ def update_src(src_path: str, dst_path: str) -> None:
f"{orfs_docs_url}/{src_path.split('_sources/')[-1].replace('.md', '.html')}"
)
elif 'manpages' in dst_path:
manpage_path = dst_path.replace('data/markdown/', '')
commit_hash = os.getenv(
'ORQA_RAG_DATASETS_COMMIT', '470c7ecd67d3a22557500a451b73a31fc8c4ec15'
)
manpage_path = dst_path.replace('data/markdown/', 'markdown/')
source_dict[dst_path] = (
f'https://huggingface.co/datasets/The-OpenROAD-Project/ORQA_RAG_datasets/raw/{commit_hash}/{manpage_path}'
f'https://huggingface.co/datasets/The-OpenROAD-Project/ORAssistant_RAG_Dataset/raw/main/{manpage_path}'
)
elif 'yosys' in dst_path:
source_dict[dst_path] = f"https://{dst_path[len('data/html/yosys_docs') :]}"
Expand Down Expand Up @@ -347,8 +363,9 @@ def get_or_website_html() -> None:


def get_or_publications() -> None:
# TODO: verify if this is indeed all publications. New format seem to truncate to 10 latest.
try:
html = requests.get('https://theopenroadproject.org/publications/').text
html = requests.get(or_publications_url).text
soup = BeautifulSoup(html, 'lxml')
links = soup.find_all('a')
papers = []
Expand Down Expand Up @@ -416,14 +433,7 @@ def get_klayout_docs_html() -> None:

if __name__ == '__main__':
logging.info('Building knowledge base...')
docs_paths = [
'data/markdown/manpages',
'data/markdown/OR_docs',
'data/markdown/ORFS_docs',
'data/markdown/OpenSTA_docs',
'data/pdf',
'data/html',
]
docs_paths = ['data']
purge_folders(folder_paths=docs_paths)

os.makedirs('data/markdown/manpages', exist_ok=True)
Expand All @@ -448,16 +458,12 @@ def get_klayout_docs_html() -> None:

clone_repo(
url='https://github.com/The-OpenROAD-Project/OpenROAD.git',
commit_hash=os.getenv(
'OR_REPO_COMMIT', 'ffc5760f2df639cd184c40ceba253c7e02a006d5'
),
commit_hash=or_repo_commit,
folder_name='OpenROAD',
)
clone_repo(
url='https://github.com/The-OpenROAD-Project/OpenROAD-flow-scripts.git',
commit_hash=os.getenv(
'ORFS_REPO_COMMIT', 'b94834df01cb58915bc0e8dabf85a314fbd8fb9e'
),
commit_hash=orfs_repo_commit,
folder_name='OpenROAD-flow-scripts',
)

Expand All @@ -473,9 +479,21 @@ def get_klayout_docs_html() -> None:

os.remove(f'{cur_dir}/data/markdown/OR_docs/installation/MessagesFinal.md')

gh_disc_src_json = open(f'{cur_dir}/data/markdown/gh_discussions/mapping.json', 'r')
gh_disc_src = json.load(gh_disc_src_json)
snapshot_download(
repo_id='The-OpenROAD-Project/ORAssistant_RAG_Dataset',
repo_type='dataset',
revision='main',
allow_patterns=[
'markdown/gh_discussions/**/*',
'markdown/gh_discussions/*',
],
local_dir='data',
)

with open(f'{cur_dir}/data/markdown/gh_discussions/mapping.json') as gh_disc:
gh_disc_src = json.load(gh_disc)
gh_disc_path = 'data/markdown/gh_discussions'
source_dict = {}
for file in gh_disc_src.keys():
full_path = os.path.join(gh_disc_path, file)
source_dict[full_path] = gh_disc_src[file]['url']
Expand Down

0 comments on commit d040878

Please sign in to comment.