-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a collectGithubRepoFiles tool (#8)
* start work on codebase stripping * tools: add github repo lookup method, updates to code stripper * Add a `collectGithubRepoFiles` tool * update codebase.js * remove debug logging
- Loading branch information
1 parent
6364655
commit aa50927
Showing
9 changed files
with
598 additions
and
254 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,135 +1,7 @@ | ||
# Logs | ||
logs | ||
*.log | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
lerna-debug.log* | ||
.pnpm-debug.log* | ||
|
||
# Diagnostic reports (https://nodejs.org/api/report.html) | ||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json | ||
|
||
# Runtime data | ||
pids | ||
*.pid | ||
*.seed | ||
*.pid.lock | ||
|
||
# Directory for instrumented libs generated by jscoverage/JSCover | ||
lib-cov | ||
|
||
# Coverage directory used by tools like istanbul | ||
coverage | ||
*.lcov | ||
|
||
# nyc test coverage | ||
.nyc_output | ||
|
||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) | ||
.grunt | ||
|
||
# Bower dependency directory (https://bower.io/) | ||
bower_components | ||
|
||
# node-waf configuration | ||
.lock-wscript | ||
|
||
# Compiled binary addons (https://nodejs.org/api/addons.html) | ||
build/Release | ||
|
||
# Dependency directories | ||
node_modules/ | ||
jspm_packages/ | ||
|
||
# Snowpack dependency directory (https://snowpack.dev/) | ||
web_modules/ | ||
|
||
# TypeScript cache | ||
*.tsbuildinfo | ||
|
||
# Optional npm cache directory | ||
.npm | ||
|
||
# Optional eslint cache | ||
.eslintcache | ||
|
||
# Optional stylelint cache | ||
.stylelintcache | ||
|
||
# Microbundle cache | ||
.rpt2_cache/ | ||
.rts2_cache_cjs/ | ||
.rts2_cache_es/ | ||
.rts2_cache_umd/ | ||
|
||
# Optional REPL history | ||
.node_repl_history | ||
|
||
# Output of 'npm pack' | ||
*.tgz | ||
|
||
# Yarn Integrity file | ||
.yarn-integrity | ||
|
||
# dotenv environment variable files | ||
.env | ||
.env.development.local | ||
.env.test.local | ||
.env.production.local | ||
.env.local | ||
|
||
# parcel-bundler cache (https://parceljs.org/) | ||
.cache | ||
.parcel-cache | ||
|
||
# Next.js build output | ||
.next | ||
out | ||
|
||
# Nuxt.js build / generate output | ||
.nuxt | ||
dist | ||
|
||
# Gatsby files | ||
.cache/ | ||
# Comment in the public line in if your project uses Gatsby and not Next.js | ||
# https://nextjs.org/blog/next-9-1#public-directory-support | ||
# public | ||
|
||
# vuepress build output | ||
.vuepress/dist | ||
|
||
# vuepress v2.x temp and cache directory | ||
.temp | ||
.cache | ||
|
||
# Docusaurus cache and generated files | ||
.docusaurus | ||
|
||
# Serverless directories | ||
.serverless/ | ||
|
||
# FuseBox cache | ||
.fusebox/ | ||
|
||
# DynamoDB Local files | ||
.dynamodb/ | ||
|
||
# TernJS port file | ||
.tern-port | ||
|
||
# Stores VSCode versions used for testing VSCode extensions | ||
.vscode-test | ||
|
||
# yarn v2 | ||
.yarn/cache | ||
.yarn/unplugged | ||
.yarn/build-state.yml | ||
.yarn/install-state.gz | ||
.pnp.* | ||
playground | ||
node_modules | ||
package-lock.json | ||
*.key | ||
|
||
test/*.html | ||
playground | ||
test/*.html | ||
src/tools/repos | ||
__* | ||
*.key |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,111 +1,7 @@ | ||
const { CompletionService } = require('./CompletionService') | ||
const codebase = require('./tools/codebase') | ||
const viz = require('./tools/viz') | ||
|
||
function makeVizHtml (data) { | ||
return ` | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>LLM Output Viz</title> | ||
<style> | ||
pre { | ||
max-width: 25vw; | ||
overflow: auto; | ||
} | ||
h3 { | ||
background-color: lightcyan; | ||
margin-top: 12px; | ||
} | ||
#grid { | ||
/* margin: 1%; */ | ||
.correct h3 { | ||
background-color: lightgreen; | ||
} | ||
} | ||
#grid div { | ||
margin-right: 2%; | ||
} | ||
</style> | ||
</head> | ||
<body> | ||
<button id="wordwrap">Toggle Word Wrap</button> | ||
<div id="grid" style='display: flex; flex-direction: row;'> | ||
<div> | ||
<h3>System Prompt</h3> | ||
<pre id="psys">SYS PROMPT</pre> | ||
</div> | ||
<div> | ||
<h3>User Prompt</h3> | ||
<pre id="pusr">USR PROMPT</pre> | ||
</div> | ||
${ | ||
data.models.map(([modelName, modelId]) => | ||
`<div><h3>${modelName}</h3><pre id="presp${modelId}">MODEL OUTPUT</pre></div>`).join('\n') | ||
} | ||
</div> | ||
<script> | ||
function toggleWordWrap () { | ||
const $pre = document.querySelectorAll('pre'); | ||
for (const $p of $pre) { | ||
$p.style.whiteSpace = $p.style.whiteSpace === 'pre-wrap' ? 'pre' : 'pre-wrap'; | ||
} | ||
} | ||
wordwrap.onclick = toggleWordWrap; | ||
toggleWordWrap(); | ||
const $psys = document.getElementById('psys'); | ||
const $pusr = document.getElementById('pusr'); | ||
const data = ${JSON.stringify(data)}; | ||
const outputs = data.outputs; | ||
if ($psys) $psys.textContent = data.system; | ||
if ($pusr) $pusr.textContent = data.user; | ||
for (const [modelName, modelId] of data.models) { | ||
const $presp = document.getElementById('presp' + modelId); | ||
if ($presp) $presp.textContent = outputs[modelId]; | ||
} | ||
</script> | ||
</body> | ||
</html> | ||
` | ||
module.exports = { | ||
makeVizForPrompt: viz.makeVizForPrompt, | ||
collectGithubRepoFiles: codebase.collectGithubRepoFiles | ||
} | ||
|
||
async function makeVizForPrompt (system, user, models) { | ||
const service = new CompletionService() | ||
const data = { models: [], outputs: {} } | ||
for (const model of models) { | ||
const { text } = await service.requestCompletion(model, system, user) | ||
switch (model) { | ||
case 'gpt-3.5-turbo-16k': | ||
data.models.push(['GPT-3.5 Turbo 16k', '3516turbo']) | ||
data.outputs['3516turbo'] = text | ||
break | ||
case 'gpt-3.5-turbo': | ||
data.models.push(['GPT-3.5 Turbo', '35turbo']) | ||
data.outputs['35turbo'] = text | ||
break | ||
case 'gpt-4': | ||
data.models.push(['GPT-4', 'gpt4']) | ||
data.outputs.gpt4 = text | ||
break | ||
case 'gpt-4-turbo-preview': | ||
data.models.push(['GPT-4 Turbo Preview', 'gpt4turbo']) | ||
data.outputs.gpt4turbo = text | ||
break | ||
case 'gemini-1.0-pro': | ||
data.models.push(['Gemini 1.0 Pro', 'gemini']) | ||
data.outputs.gemini = text | ||
break | ||
default: | ||
data.models.push([model, model]) | ||
data.outputs[model] = text | ||
} | ||
} | ||
data.system = system | ||
data.user = user | ||
return makeVizHtml(data) | ||
} | ||
|
||
module.exports = { makeVizForPrompt } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
const fs = require('fs') | ||
const cp = require('child_process') | ||
const { join } = require('path') | ||
|
||
function fixSeparator (path) { | ||
return path.replace(/\\/g, '/') | ||
} | ||
|
||
function getAllFilesIn (folder) { | ||
const files = [] | ||
const entries = fs.readdirSync(folder, { withFileTypes: true }) | ||
for (const entry of entries) { | ||
const fullPath = join(folder, entry.name) | ||
if (entry.isDirectory()) { | ||
files.push(...getAllFilesIn(fullPath)) | ||
} else { | ||
files.push(fullPath) | ||
} | ||
} | ||
return files.map(fixSeparator) | ||
} | ||
|
||
// This function will clone a github repo, review all the files and merge relevant files into a single file | ||
function collectGithubRepoFiles (repo, options) { | ||
const extension = options.extension | ||
const branch = options.branch || 'master' | ||
// First, try to clone the repo inside a "repos" folder in this directory | ||
const safeName = repo.replace(/\//g, ',') | ||
const reposDir = join(__dirname, 'repos') | ||
const repoPath = join(reposDir, safeName) | ||
fs.mkdirSync(reposDir, { recursive: true }) | ||
if (!fs.existsSync(repoPath)) { | ||
cp.execSync(`git clone https://github.com/${repo}.git ${safeName}`, { cwd: reposDir }) | ||
} | ||
// Git pull origin/$branch | ||
cp.execSync(`git pull origin ${branch}`, { cwd: repoPath }) | ||
// Check out the branch | ||
cp.execSync(`git checkout ${branch}`, { cwd: repoPath }) | ||
// Now collect all the files inside repoPath, like `tree` | ||
const allFiles = getAllFilesIn(repoPath) | ||
.map(f => [f, f.replace(fixSeparator(repoPath), '')]) | ||
|
||
// Now figure out the relevant files | ||
const relevantFiles = [] | ||
for (const [file, relFile] of allFiles) { | ||
if (extension && !file.endsWith(extension)) { | ||
continue | ||
} | ||
if (options.matching) { | ||
if (typeof options.matching === 'function') { | ||
if (!options.matching(relFile)) { | ||
continue | ||
} | ||
} else if (!options.matching.some(m => relFile.match(m))) { | ||
continue | ||
} | ||
} | ||
relevantFiles.push([file, relFile]) | ||
} | ||
const fileContents = relevantFiles.map(([abs, rel]) => [abs, rel, fs.readFileSync(abs, 'utf8').trim()]) | ||
return fileContents | ||
} | ||
|
||
module.exports = { collectGithubRepoFiles } |
Oops, something went wrong.