Skip to content

Commit

Permalink
AE init
Browse files Browse the repository at this point in the history
  • Loading branch information
chyanju committed Mar 5, 2022
1 parent e34005b commit 92d50bd
Show file tree
Hide file tree
Showing 2,751 changed files with 1,553,159 additions and 3 deletions.
88 changes: 88 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
.ipynb_checkpoints/
tmp/

# npm related
node_modules/
package-lock.json
package.json

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# PyBuilder
target/

# pyenv
.python-version

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Sphinx
_build/

# Pyre type checker
.pyre/

# Miscellaneous
.DS_Store
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2022 Yanju Chen
Copyright (c) 2021 Yanju Chen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
266 changes: 264 additions & 2 deletions README.md

Large diffs are not rendered by default.

226 changes: 226 additions & 0 deletions analyze-results-Poe.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9f23ee9c-04a4-446c-8d35-a3a18679b55c",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"import os\n",
"import re\n",
"from pathlib import Path\n",
"\n",
"from trinity.utils.visqa import parse_value"
]
},
{
"cell_type": "markdown",
"id": "55695ae7-218a-42e7-883d-ed20b5269e05",
"metadata": {},
"source": [
"### Change the `result_dir` to the corresponding folder that you want to analyze"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d12ec9f1-ff02-4d12-b7a2-6cdf34fe748d",
"metadata": {},
"outputs": [],
"source": [
"result_dir, path_plate = \"./logs/full-1119/\", \"{}/{}_full.log\"\n",
"# result_dir, path_plate = \"./logs/abstract-only-1119/\", \"{}/{}_abstract-only.log\"\n",
"# result_dir, path_plate = \"./logs/optimal-only-1119/\", \"{}/{}_optimal-only.log\"\n",
"\n",
"data_path = \"./benchmarks/VisQA/shared/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa395e05-a22f-4828-b059-539a97835a21",
"metadata": {},
"outputs": [],
"source": [
"with open(\"{}/tapas_on_visqa_dataset.pkl\".format(data_path), \"rb\") as f:\n",
" dt = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9bfdb3c-95ba-457b-862e-c526e1a31b06",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"# for every benchmark, search for logs\n",
"collected_answers = []\n",
"expected_answers = []\n",
"n_timeout = 0\n",
"for i in range(len(dt)):\n",
" tmp_id = dt[i][\"short_id\"]\n",
" expected_answers.append(dt[i][\"repr_answer\"])\n",
" tmp_log_path = path_plate.format(result_dir, tmp_id)\n",
" # test file existence\n",
" tmp_pflag = os.path.exists(tmp_log_path)\n",
" if tmp_pflag:\n",
" # found, look for review report ranking list\n",
" # read logs\n",
" with open(tmp_log_path, \"r\") as f:\n",
" tmp_log = f.read()\n",
" \n",
" if \"---------- timeout ----------\" in tmp_log:\n",
" n_timeout += 1\n",
" print(\"# i={}, {}, timeout detected\".format(i, dt[i][\"short_id\"]))\n",
" \n",
" # first detect for fallback strategy (original)\n",
" if \"# ====> fallback strategy (original) is triggered\" in tmp_log:\n",
" # if True:\n",
" # collected_answers.append([(-1,dt[i][\"repr_answer\"])])\n",
" collected_answers.append([(-1,[dt[i][\"candidate_outputs\"][\"TaPas_original\"][0]])])\n",
" # collected_answers.append(None)\n",
" pass\n",
" else:\n",
" tmp0_res = sorted(\n",
" re.findall(r\"# top-(.*?), score: (.*?), answer: (.*)\", tmp_log),\n",
" key=lambda x:x[1],\n",
" reverse=True,\n",
" )\n",
"\n",
" # ========\n",
" # FIXME: for new version of logs, you need to evaluate it to get the repr_answer\n",
" # ========\n",
"\n",
" # sort out and remove duplicates\n",
" tmp1_res = []\n",
" tmp1_res_set = set()\n",
" for p in tmp0_res:\n",
" if p[2] not in tmp1_res_set:\n",
" # don't include the same answer twice\n",
" # tmp1_res_set.add(p[2])\n",
" # tmp1_res.append((p[1],[p[2]])) # (top score, repr_answer)\n",
" tmp1_res_set.add(p[2])\n",
" tmp1_res.append((p[1],eval(p[2]))) # (top score, repr_answer)\n",
" collected_answers.append(tmp1_res)\n",
" else:\n",
" # probably not ready, put None\n",
" collected_answers.append(None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82bb3090-9115-4d2a-9949-6557316f0642",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"# compute top-1 accuracy\n",
"# ========\n",
"# FIXME: for new version of logs, you need new methods to automatically compute this\n",
"# ========\n",
"def parse_list(arg_list):\n",
" Q = [parse_value(str(p).lower()) for p in arg_list]\n",
" return [str(p) if isinstance(p,str) else \"{:.4f}\".format(p) for p in Q]\n",
"\n",
"N_TOP = 1\n",
"acc = 0\n",
"tot = 0\n",
"for i in range(len(dt)):\n",
" if collected_answers[i] is None:\n",
" print(\"i={}, -1\".format(i))\n",
" # not ready\n",
" continue\n",
" \n",
" tot += 1\n",
" proposed_answers = [p[1] for p in collected_answers[i][:N_TOP]]\n",
" proposed_scores = [p[0] for p in collected_answers[i][:N_TOP]]\n",
" is_found = False\n",
" for j in range(len(proposed_answers)):\n",
" p = proposed_answers[j]\n",
" s = proposed_scores[j]\n",
"# pp = parse_list(p)\n",
"# pe = parse_list(expected_answers[i])\n",
"# print(\"i={}, pp={}, pe={}\".format(i,pp,pe))\n",
" if parse_list(p) == parse_list(expected_answers[i]):\n",
" acc += 1\n",
" is_found = True\n",
"# if s==-1:\n",
"# print(\"# i={}, {}, hit, original fallback\".format(i, dt[i][\"short_id\"]))\n",
"# else:\n",
"# print(\"# i={}, {}, hit\".format(i, dt[i][\"short_id\"]))\n",
" break\n",
" if is_found:\n",
" print(\"1\")\n",
" else:\n",
" print(\"0\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbb6e6b4-978b-40ad-aca3-e26fc9ccc6d1",
"metadata": {},
"outputs": [],
"source": [
"acc, tot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c24fd2c1-f756-4332-9fa8-636260596687",
"metadata": {},
"outputs": [],
"source": [
"acc/tot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8aca07e1-a2d9-4f76-856c-04f959842f3f",
"metadata": {},
"outputs": [],
"source": [
"n_timeout"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0722c664-15d7-4ac9-a9a5-c29245371444",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "base"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 92d50bd

Please sign in to comment.