Skip to content

Commit

Permalink
Merge branch 'main' into add_credits_to_ubc_and_university_of_wiscons…
Browse files Browse the repository at this point in the history
…in_in_readme
  • Loading branch information
SoloSynth1 committed Jun 25, 2024
2 parents 3d56d69 + 70e574e commit a28fef1
Show file tree
Hide file tree
Showing 292 changed files with 1,420 additions and 46,503 deletions.
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[run]
omit = tests/*
78 changes: 78 additions & 0 deletions .github/workflows/ci-full.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: ci-full

on:
# only pushes to the protected branch i.e. `main` should trigger this flow.
# enabling this flow to be triggered on push to **unprotected** branches
# or Pull Requests to **any** branch risks exposing the secrets for any
# unapproved changes (basically RCE)
push:
branches:
- main

jobs:

test-then-build-wheel:
# Set up operating system
runs-on: ubuntu-latest

outputs:
wheelfile: ${{ steps.wheel-name.outputs.WHEELFILE }}

environment: integration-testing

# Define job steps
steps:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"

- name: Check-out repository
uses: actions/checkout@v3

- name: Install poetry
uses: snok/install-poetry@v1

- name: Install package
run: poetry install

- name: Install pandoc
run: sudo apt-get update && sudo apt-get install -y pandoc

- name: Install tectonic
run: sudo snap refresh && sudo snap install tectonic

- name: Test with pytest
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: poetry run pytest --cov

- name: Test package building process
run: rm -rf ./dist && poetry build --format wheel --output dist

- name: Get built wheel path
id: wheel-name
run: echo "WHEELFILE=$(ls dist)" >> "$GITHUB_OUTPUT"

- name: Upload test build as artifact
uses: actions/upload-artifact@v4
with:
name: test-wheel
path: dist/${{ steps.wheel-name.outputs.WHEELFILE }}

wheel-install-test:
runs-on: ubuntu-latest
needs: [test-then-build-wheel]
steps:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"

- name: Download built wheel
uses: actions/download-artifact@v4
with:
name: test-wheel

- name: Test pip install local wheel
run: pip install -U wheel ${{needs.test-then-build-wheel.outputs.wheelfile}}
41 changes: 37 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
name: ci

on: [push, pull_request]
on: [push]

jobs:

ci:
unit-test-then-build-wheel:
# Set up operating system
runs-on: ubuntu-latest

outputs:
wheelfile: ${{ steps.wheel-name.outputs.WHEELFILE }}

# Define job steps
steps:
- name: Set up Python
Expand All @@ -30,5 +33,35 @@ jobs:
- name: Install tectonic
run: sudo snap refresh && sudo snap install tectonic

- name: Test with pytest
run: poetry run pytest tests
- name: Test with pytest (skip integration tests)
run: poetry run pytest --cov -m "not integration"

- name: Test package building process
run: rm -rf ./dist && poetry build --format wheel --output dist

- name: Get built wheel path
id: wheel-name
run: echo "WHEELFILE=$(ls dist)" >> "$GITHUB_OUTPUT"

- name: Upload test build as artifact
uses: actions/upload-artifact@v4
with:
name: test-wheel
path: dist/${{ steps.wheel-name.outputs.WHEELFILE }}

wheel-install-test:
runs-on: ubuntu-latest
needs: [unit-test-then-build-wheel]
steps:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"

- name: Download built wheel
uses: actions/download-artifact@v4
with:
name: test-wheel

- name: Test pip install local wheel
run: pip install -U wheel ${{needs.unit-test-then-build-wheel.outputs.wheelfile}}
13 changes: 12 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
draft/*
**/*~
.env
/data/

# vim swap files
**/*~
Expand All @@ -12,4 +13,14 @@ draft/*
.idea/

# pycache
**/__pycache__/
**/__pycache__/

# built package
dist/

# pytest-cov coverage files
.coverage*

# quarto github page related
/.quarto/
/_site/
38 changes: 38 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Reference:
# https://swcarpentry.github.io/make-novice/02-makefiles.html
# https://ubc-dsci.github.io/reproducible-and-trustworthy-workflows-for-data-science/materials/lectures/09-pipelines.html

# The .PHONY rule is used to tell make that 'all', 'clean' are not files.
.PHONY : all
# The 'all' target is the default target. It depends on 'report/docs/index.html', which triggers the build process for this file.
all : report/docs/index.html

# Unzip batch_run.zip
data/batch_run/batch_run_3.5-turbo \
data/batch_run/batch_run_4-turbo \
data/batch_run/batch_run_4o :
unzip data/batch_run/batch_run.zip -d data/batch_run/

# Preprocess
data/processed/ground_truth.csv : analysis/preprocess_batch_run_result.py data/batch_run/batch_run_3.5-turbo
python analysis/preprocess_batch_run_result.py

# Build 'report/docs/index.html' by rendering the Jupyter notebooks using Quarto.
report/docs/index.html : data/processed/ground_truth.csv
quarto render

.PHONY : publish
publish : data/processed/ground_truth.csv
quarto publish gh-pages

# The 'clean' target is used to clean up generated files and directories.
.PHONY : clean
clean :
rm -rf report/docs/*
touch report/docs/.gitkeep
rm -rf data/batch_run/batch_run_3.5-turbo
rm -rf data/batch_run/batch_run_4-turbo
rm -rf data/batch_run/batch_run_4o
rm -rf data/processed/ground_truth.csv
rm -rf data/processed/score_*csv

8 changes: 4 additions & 4 deletions report/final_report/_quarto.yml → _quarto.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
project:
type: website
render:
- "*qmd"
output-dir: docs
- "report/*qmd"
output-dir: report/docs

website:
sidebar:
Expand All @@ -11,9 +11,9 @@ website:
search: true
contents:
- text: "Capstone Final Report"
href: final_report.qmd
href: report/final_report.qmd
- text: "Capstone Proposal"
href: proposal.qmd
href: report/proposal.qmd

format:
html:
Expand Down
8 changes: 5 additions & 3 deletions analysis/batch_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ def main(config_yml):
from_dir = config['repo_base_path']
to_dir = config['response_path']
repos = config['repo']
model = config['model']

record = []
for repo in tqdm(repos):
for run in range(1, runs+1):
action.evaluate(
repo_path=os.path.join(from_dir, repo['path']),
save_to=f"{to_dir}/{repo['name']}_{"{:02d}".format(run)}.json",
checklist_path=checklist_path
save_response_to=f"{to_dir}/{repo['name']}_{"{:02d}".format(run)}.json",
checklist_path=checklist_path,
model=model
)

record.append({
Expand All @@ -43,4 +45,4 @@ def main(config_yml):
with open(f"{to_dir}/record.yml", 'w') as file:
yaml.dump(record, file)

fire.Fire(main)
fire.Fire(main)
5 changes: 3 additions & 2 deletions analysis/batch_run.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
runs: 30
checklist_path: null
repo_base_path: '../data/raw/openja/'
response_path: '../data/processed/batch_run'
model: 'gpt-3.5-turbo'
repo_base_path: 'data/raw/openja/'
response_path: 'data/processed/batch_run_3.5-turbo'
repo:
- name: lightfm
path: './lightfm'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,26 @@
"outputs": [],
"source": [
"df_repo_run_file = get_scores_by_repo_by_run_by_file('../../data/processed/batch_run_3.5-turbo/')\n",
"df_repo_run, df_repo__stat, df_repo__count = preprocess(df_repo_run_file)"
"df_repo_run, df_repo__stat, df_repo__count = preprocess(df_repo_run_file)\n",
"\n",
"df_repo_run.to_csv('score_by_repo_run_3.5-turbo.csv', index=False)\n",
"df_repo__stat.to_csv('score_stat_by_repo_3.5-turbo.csv', index=False)\n",
"df_repo__count.to_csv('score_count_by_repo_3.5-turbo.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "31c1ce0b-14e3-4825-aa6e-74dd4d4af960",
"execution_count": 5,
"id": "853ba351-b620-4833-8683-a4e62c7fd9a4",
"metadata": {},
"outputs": [],
"source": [
"df_repo_run.to_csv('score_by_repo_run_3.5-turbo.csv', index=False)\n",
"df_repo__stat.to_csv('score_stat_by_repo_3.5-turbo.csv', index=False)\n",
"df_repo__count.to_csv('score_count_by_repo_3.5-turbo.csv', index=False)"
"df_repo_run_file = get_scores_by_repo_by_run_by_file('../../data/processed/batch_run_4-turbo/')\n",
"df_repo_run, df_repo__stat, df_repo__count = preprocess(df_repo_run_file)\n",
"\n",
"df_repo_run.to_csv('score_by_repo_run_4-turbo.csv', index=False)\n",
"df_repo__stat.to_csv('score_stat_by_repo_4-turbo.csv', index=False)\n",
"df_repo__count.to_csv('score_count_by_repo_4-turbo.csv', index=False)"
]
},
{
Expand All @@ -200,7 +207,7 @@
"metadata": {},
"outputs": [],
"source": [
"df_repo_run_file = get_scores_by_repo_by_run_by_file('../../data/processed/batch_run_4o/')\n",
"df_repo_run_file = get_scores_by_repo_by_run_by_file('../../data/batch_run/batch_run_4o/')\n",
"df_repo_run, df_repo__stat, df_repo__count = preprocess(df_repo_run_file)\n",
"\n",
"df_repo_run.to_csv('score_by_repo_run_4o.csv', index=False)\n",
Expand All @@ -221,16 +228,6 @@
"ground_truth_df.to_csv('ground_truth.csv')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "82d58e97-827a-4a3c-a313-c7623341bdd2",
"metadata": {},
"outputs": [],
"source": [
"#df_repo__count.melt(id_vars=['repo', 'level_1'])"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
Loading

0 comments on commit a28fef1

Please sign in to comment.