Skip to content

Commit

Permalink
fix: update evaluator tests to use proper AnswerAction type (#58)
Browse files Browse the repository at this point in the history
* fix: update evaluator tests to use proper AnswerAction type

Co-Authored-By: Han Xiao <[email protected]>

* fix: increase token budget and mock external calls in agent test

Co-Authored-By: Han Xiao <[email protected]>

* test: add Docker build and container tests

Co-Authored-By: Han Xiao <[email protected]>

* feat: add health check endpoint for Docker container verification

Co-Authored-By: Han Xiao <[email protected]>

* chore: add Docker test script to package.json

Co-Authored-By: Han Xiao <[email protected]>

* ci: add Docker test step to CI workflow

Co-Authored-By: Han Xiao <[email protected]>

* fix: remove unused stdout variable in docker test

Co-Authored-By: Han Xiao <[email protected]>

close #53 

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Han Xiao <[email protected]>
  • Loading branch information
devin-ai-integration[bot] and hanxiao authored Feb 11, 2025
1 parent 8af35c6 commit 2efae96
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 5 deletions.
14 changes: 13 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,16 @@ jobs:
JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: npm test
run: npm test

- name: Set up Docker
uses: docker/setup-buildx-action@v3

- name: Run Docker tests
env:
BRAVE_API_KEY: mock_key
GEMINI_API_KEY: mock_key
JINA_API_KEY: mock_key
GOOGLE_API_KEY: mock_key
OPENAI_API_KEY: mock_key
run: npm run test:docker
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"start": "ts-node src/server.ts",
"eval": "ts-node src/evals/batch-evals.ts",
"test": "jest --testTimeout=30000",
"test:watch": "jest --watch"
"test:watch": "jest --watch",
"test:docker": "jest src/__tests__/docker.test.ts --testTimeout=300000"
},
"keywords": [],
"author": "Jina AI",
Expand Down
38 changes: 37 additions & 1 deletion src/__tests__/agent.test.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,48 @@
import { getResponse } from '../agent';
import { generateObject } from 'ai';
import { search } from '../tools/jina-search';
import { readUrl } from '../tools/read';

// Mock external dependencies
jest.mock('ai', () => ({
generateObject: jest.fn()
}));

jest.mock('../tools/jina-search', () => ({
search: jest.fn()
}));

jest.mock('../tools/read', () => ({
readUrl: jest.fn()
}));

describe('getResponse', () => {
beforeEach(() => {
// Mock generateObject to return a valid response
(generateObject as jest.Mock).mockResolvedValue({
object: { action: 'answer', answer: 'mocked response', references: [], think: 'mocked thought' },
usage: { totalTokens: 100 }
});

// Mock search to return empty results
(search as jest.Mock).mockResolvedValue({
response: { data: [] }
});

// Mock readUrl to return empty content
(readUrl as jest.Mock).mockResolvedValue({
response: { data: { content: '', url: 'test-url' } },
tokens: 0
});
});

afterEach(() => {
jest.useRealTimers();
jest.clearAllMocks();
});

it('should handle search action', async () => {
const result = await getResponse('What is TypeScript?', 10000);
const result = await getResponse('What is TypeScript?', 50000); // Increased token budget to handle real-world usage
expect(result.result.action).toBeDefined();
expect(result.context).toBeDefined();
expect(result.context.tokenTracker).toBeDefined();
Expand Down
41 changes: 41 additions & 0 deletions src/__tests__/docker.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { exec } from 'child_process';
import { promisify } from 'util';

const execAsync = promisify(exec);

describe('Docker build', () => {
jest.setTimeout(300000); // 5 minutes for build

it('should build Docker image successfully', async () => {
const { stderr } = await execAsync('docker build -t node-deepresearch-test .');
expect(stderr).not.toContain('error');
});

it('should start container and respond to health check', async () => {
// Start container with mock API keys
await execAsync(
'docker run -d --name test-container -p 3001:3000 ' +
'-e GEMINI_API_KEY=mock_key ' +
'-e JINA_API_KEY=mock_key ' +
'node-deepresearch-test'
);

// Wait for container to start
await new Promise(resolve => setTimeout(resolve, 5000));

try {
// Check if server responds
const { stdout } = await execAsync('curl -s http://localhost:3001/health');
expect(stdout).toContain('ok');
} finally {
// Cleanup
await execAsync('docker rm -f test-container').catch(console.error);
}
});

afterAll(async () => {
// Clean up any leftover containers
await execAsync('docker rm -f test-container').catch(() => {});
await execAsync('docker rmi node-deepresearch-test').catch(() => {});
});
});
5 changes: 5 additions & 0 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')
app.use(cors());
app.use(express.json());

// Add health check endpoint for Docker container verification
app.get('/health', (req, res) => {
res.json({ status: 'ok' });
});

const eventEmitter = new EventEmitter();

interface QueryRequest extends Request {
Expand Down
14 changes: 12 additions & 2 deletions src/tools/__tests__/evaluator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@ describe('evaluateAnswer', () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'What is TypeScript?',
'TypeScript is a strongly typed programming language that builds on JavaScript.',
{
action: "answer",
think: "Providing a clear definition of TypeScript",
answer: "TypeScript is a strongly typed programming language that builds on JavaScript.",
references: []
},
['definitive'],
tokenTracker
);
Expand All @@ -38,7 +43,12 @@ describe('evaluateAnswer', () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'List three programming languages.',
'Python is a programming language.',
{
action: "answer",
think: "Providing an example of a programming language",
answer: "Python is a programming language.",
references: []
},
['plurality'],
tokenTracker
);
Expand Down

0 comments on commit 2efae96

Please sign in to comment.