From 651ad593c76f9872e1b5e09c92ab185d4fc02049 Mon Sep 17 00:00:00 2001 From: Charles Teague Date: Fri, 7 Mar 2025 13:05:28 -0500 Subject: [PATCH] Improve startup performance task detection - Make the cache a persistent workspace cache - Improve the perf by working more asynchronously - Add log output to startup channel to track perf --- tools/vscode/CHANGELOG.md | 4 + tools/vscode/package.json | 2 +- .../workspace/workspace-task-provider.ts | 111 ++++++++++++------ 3 files changed, 80 insertions(+), 37 deletions(-) diff --git a/tools/vscode/CHANGELOG.md b/tools/vscode/CHANGELOG.md index 649d6a519..89d39205a 100644 --- a/tools/vscode/CHANGELOG.md +++ b/tools/vscode/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.3.56 + +- Improve startup performance and task detection performance + ## 0.3.55 - Remove display of status icons in log listing (it causes performance issues). diff --git a/tools/vscode/package.json b/tools/vscode/package.json index 1c240997b..f207a36fa 100644 --- a/tools/vscode/package.json +++ b/tools/vscode/package.json @@ -7,7 +7,7 @@ "author": { "name": "UK AI Security Institute" }, - "version": "0.3.55", + "version": "0.3.56", "license": "MIT", "homepage": "https://inspect.ai-safety-institute.org.uk/", "repository": { diff --git a/tools/vscode/src/providers/workspace/workspace-task-provider.ts b/tools/vscode/src/providers/workspace/workspace-task-provider.ts index 33d1d3a88..5c029d8d2 100644 --- a/tools/vscode/src/providers/workspace/workspace-task-provider.ts +++ b/tools/vscode/src/providers/workspace/workspace-task-provider.ts @@ -1,15 +1,16 @@ import path, { join, relative } from "path"; import { AbsolutePath, activeWorkspacePath } from "../../core/path"; -import { Event, EventEmitter, ExtensionContext, FileSystemWatcher, workspace } from "vscode"; +import { Event, EventEmitter, ExtensionContext, FileStat, FileSystemWatcher, Uri, workspace } from "vscode"; import { throttle } from "lodash"; import { InspectChangedEvent, InspectManager } from "../inspect/inspect-manager"; +import { startup } from "../../core/log"; // Activates the provider which tracks the currently active task (document and task name) export function activateWorkspaceTaskProvider(inspectManager: InspectManager, context: ExtensionContext) { // The task manager - const taskManager = new WorkspaceTaskManager(); + const taskManager = new WorkspaceTaskManager(context); // If the interpreter changes, refresh the tasks context.subscriptions.push(inspectManager.onInspectChanged(async (e: InspectChangedEvent) => { @@ -39,13 +40,14 @@ const kTaskFilePattern = "**/*.{py,ipynb}"; // Tracks what the active task is in the editor export class WorkspaceTaskManager { - constructor() { + constructor(context: ExtensionContext) { this.watcher = workspace.createFileSystemWatcher( kTaskFilePattern, false, false, false ); + this.context = context; const onChange = throttle(async () => { await this.refresh(); }, 5000, { leading: true, trailing: true }); @@ -56,12 +58,13 @@ export class WorkspaceTaskManager { private tasks_: TaskPath[] = []; private watcher: FileSystemWatcher; + private context: ExtensionContext; public async refresh() { const workspace = activeWorkspacePath(); try { - const tasks = await inspectTaskData(workspace); + const tasks = await inspectTaskData(this.context, workspace); this.setTasks(tasks); } catch (err: unknown) { console.log("Unable to read inspect task data."); @@ -100,53 +103,89 @@ const kTaskRegex = /@task/; const kTaskNameRegex = /^[ \t]*@task(?:\([^)]*\))?[ \t]*\r?\n[ \t]*def\s+([A-Za-z_]\w*)\s*\(/gm; const kExcludeGlob = '**/{.venv,venv,__pycache__,.git,node_modules,env,envs,conda-env,.tox,.pytest_cache,.mypy_cache,.idea,.vscode,build,dist,.eggs,*.egg-info,.ipynb_checkpoints}/**'; -// Cache tasks cache (caches task info per file) -const taskFileCache: Record = {}; +interface TaskCache { + [filePath: string]: { updated: number; descriptors: TaskDescriptor[] }; +} + +async function workspaceTasks( + context: ExtensionContext, + workspacePath: AbsolutePath +): Promise { + const start = Date.now(); + const files = await workspace.findFiles('**/*.py', kExcludeGlob); + + // Filter files with skip prefixed + const validFiles = files.filter((file) => { + const relativePath = relative(workspacePath.path, file.fsPath); + return !relativePath.startsWith('_') && !relativePath.startsWith('.'); + }); + + // Load the cache + const taskFileCache = context.workspaceState.get('taskFileCache2', {}); -async function workspaceTasks(workspacePath: AbsolutePath): Promise { const tasks: TaskDescriptor[] = []; - const files = await workspace.findFiles('**/*.py', kExcludeGlob); - for (const file of files) { + // Try to cached file data, if possible + const fileStatPromises = validFiles.map(async (file) => { + const filePath = file.toString(); const stat = await workspace.fs.stat(file); - const cached = taskFileCache[file.toString()]; + const cached = taskFileCache[filePath]; - // Check to see if we can use the cached version of the result if (cached && cached.updated === stat.mtime) { tasks.push(...cached.descriptors); + return null; } else { - // We can't use a cached version of the result - const fileData = await workspace.fs.readFile(file); - const fileContent = new TextDecoder().decode(fileData); - const fileTasks = []; - if (kTaskRegex.test(fileContent)) { - // Extract the task name - const matches = fileContent.matchAll(kTaskNameRegex); - for (const match of matches) { - if (match[1]) { - const taskName = match[1]; - const taskFile = relative(workspacePath.path, file.fsPath); - if (!taskFile.startsWith("_") && !taskFile.startsWith(".")) { - fileTasks.push({ - file: taskFile, - name: taskName - }); - } - } + return { file, filePath, stat }; + } + }); + + startup.info(`Filtering ${validFiles.length} files (using cache)`); + + // Resolve file stats and filter out cached results + const filesToProcess = (await Promise.all(fileStatPromises)).filter( + (file) => file !== null + ) as { file: Uri; filePath: string; stat: FileStat }[]; + + startup.info(`Inspecting ${filesToProcess.length} files for tasks`); + + // Read files and process tasks concurrently + const fileTasksPromises = filesToProcess.map(async ({ file, filePath, stat }) => { + const fileData = await workspace.fs.readFile(file); + const fileContent = new TextDecoder().decode(fileData); + + + const fileTasks: TaskDescriptor[] = []; + const taskFile = relative(workspacePath.path, file.fsPath); + if (kTaskRegex.test(fileContent)) { + + for (const match of fileContent.matchAll(kTaskNameRegex)) { + if (match[1]) { + fileTasks.push({ file: taskFile, name: match[1] }); } - // Add the tasks to the cache - taskFileCache[file.toString()] = { updated: stat.mtime, descriptors: fileTasks }; - tasks.push(...fileTasks); } } - } + + + // Update cache in memory + taskFileCache[filePath] = { updated: stat.mtime, descriptors: fileTasks }; + return fileTasks; + }); + + // Await all task collection and add to the tasks array + const newTasks = (await Promise.all(fileTasksPromises)).flat(); + tasks.push(...newTasks); + + startup.info(`Found ${tasks.length} tasks in ${Date.now() - start}ms`); + + // Batch update the cache once at the end + await context.workspaceState.update('taskFileCache2', taskFileCache); + return tasks; } - -async function inspectTaskData(folder: AbsolutePath) { +async function inspectTaskData(context: ExtensionContext, folder: AbsolutePath) { // Read the list of tasks - const taskDescriptors = await workspaceTasks(folder); + const taskDescriptors = await workspaceTasks(context, folder); // Keep a map so we can quickly look up parents const treeMap: Map = new Map();