From e109cac8ecff89a627a78bc2fe6aafda6c5941be Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Fri, 5 Jul 2024 17:02:36 -0700 Subject: [PATCH 1/3] work in progress --- lib/fetch.js | 39 +++++-- package.json | 1 + providers/fetch/debianFetch.js | 6 +- providers/fetch/mavenBasedFetch.js | 11 +- test/unit/lib/fetchTests.js | 164 +++++++++++++++++++---------- 5 files changed, 149 insertions(+), 72 deletions(-) diff --git a/lib/fetch.js b/lib/fetch.js index 280fab9c..0b08836e 100644 --- a/lib/fetch.js +++ b/lib/fetch.js @@ -3,19 +3,38 @@ const axios = require('axios') -async function callFetch(request) { +function buildRequestOptions(request) { + let responseType = 'text' + if (request.json) { + responseType = 'json' + } else if (request.encode === null) { + responseType = 'stream' + } + + return { + method: request.method, + url: request.url, + responseType, + headers: request.headers, + data: request.body + } +} + +async function callFetch(request, axiosInstance = axios) { try { - const response = await axios({ - method: request.method, - url: request.url, - responseType: request.responseType, - headers: request.headers, - data: request.body - }) + const options = buildRequestOptions(request) + const response = await axiosInstance(options) if (request.resolveWithFullResponse) return response return response.data } catch (error) { - return error.response + if (error.response && request.resolveWithFullResponse) return error.response + throw error } } -module.exports = { callFetch } + +function withDefaults(opts) { + const axiosInstance = axios.create(opts) + return request => callFetch(request, axiosInstance) +} + +module.exports = { callFetch, withDefaults } diff --git a/package.json b/package.json index 9fdf66b7..91017017 100644 --- a/package.json +++ b/package.json @@ -88,6 +88,7 @@ "eslint": "^8.56.0", "eslint-config-prettier": "^9.1.0", "mocha": "^8.2.0", + "mockttp": "^3.13.0", "nyc": "^15.0.0", "prettier": "3.2.4", "proxyquire": "^2.1.3", diff --git a/providers/fetch/debianFetch.js b/providers/fetch/debianFetch.js index eedc3be9..76e660cf 100644 --- a/providers/fetch/debianFetch.js +++ b/providers/fetch/debianFetch.js @@ -11,10 +11,10 @@ const memCache = require('memory-cache') const nodeRequest = require('request') const path = require('path') const { promisify } = require('util') +const { callFetch: requestPromise } = require('../../lib/fetch') const tmp = require('tmp') const unixArchive = require('ar-async') const FetchResult = require('../../lib/fetchResult') -const { callFetch } = require('../../lib/fetch') const exec = promisify(require('child_process').exec) const exists = promisify(fs.exists) @@ -79,7 +79,7 @@ class DebianFetch extends AbstractFetch { // Example: https://sources.debian.org/api/src/amoeba/latest async _getLatestVersion(spec) { const url = `https://sources.debian.org/api/src/${spec.name}/latest` - const response = await callFetch({ url, responseType: 'json' }) + const response = await requestPromise({ url, json: true }) return response.version } @@ -314,7 +314,7 @@ class DebianFetch extends AbstractFetch { if (!copyrightUrl) return [] let response = '' try { - response = await callFetch({ url: copyrightUrl, responseType: 'text' }) + response = await requestPromise({ url: copyrightUrl, json: false }) } catch (error) { if (error.statusCode === 404) return [] else throw error diff --git a/providers/fetch/mavenBasedFetch.js b/providers/fetch/mavenBasedFetch.js index eaf8d2bf..36500455 100644 --- a/providers/fetch/mavenBasedFetch.js +++ b/providers/fetch/mavenBasedFetch.js @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT const AbstractFetch = require('./abstractFetch') +const { withDefaults } = require('../../lib/fetch') const nodeRequest = require('request') const { clone, get } = require('lodash') const { promisify } = require('util') @@ -14,7 +15,6 @@ const parseString = promisify(require('xml2js').parseString) const EntitySpec = require('../../lib/entitySpec') const { extractDate } = require('../../lib/utils') const FetchResult = require('../../lib/fetchResult') -const { callFetch } = require('../../lib/fetch') const extensionMap = { sourcesJar: '-sources.jar', @@ -29,7 +29,7 @@ class MavenBasedFetch extends AbstractFetch { constructor(providerMap, options) { super(options) this._providerMap = { ...providerMap } - this._handleCallFetch = options.callFetch || callFetch + this._handleRequestPromise = options.requestPromise || withDefaults(defaultHeaders) this._handleRequestStream = options.requestStream || nodeRequest.defaults(defaultHeaders).get } @@ -71,7 +71,7 @@ class MavenBasedFetch extends AbstractFetch { //Use Maven repository meta data model to get the latest version //https://maven.apache.org/ref/3.2.5/maven-repository-metadata/repository-metadata.html#class_versioning const url = `${this._buildBaseUrl(spec)}/maven-metadata.xml` - const response = await this._requestPromise({ url, responseType: 'text' }) + const response = await this._requestPromise({ url, json: false }) if (!response) return null const meta = await parseString(response) return get(meta, 'metadata.versioning[0].release[0]') @@ -115,7 +115,7 @@ class MavenBasedFetch extends AbstractFetch { async _getPom(spec) { const url = this._buildUrl(spec, extensionMap.pom) - const content = await this._requestPromise({ url, responseType: 'text' }) + const content = await this._requestPromise({ url, json: false }) if (!content) return null const pom = await parseString(content) // clean up some stuff we don't actually look at. @@ -182,8 +182,7 @@ class MavenBasedFetch extends AbstractFetch { async _requestPromise(options) { try { - options.headers = { ...defaultHeaders.headers, ...options.headers } - return await this._handleCallFetch(options) + return await this._handleRequestPromise(options) } catch (error) { if (error.statusCode === 404) return null else throw error diff --git a/test/unit/lib/fetchTests.js b/test/unit/lib/fetchTests.js index f3738686..0ed110f9 100644 --- a/test/unit/lib/fetchTests.js +++ b/test/unit/lib/fetchTests.js @@ -1,70 +1,128 @@ -const { callFetch } = require('../../../lib/fetch') +const { callFetch, withDefaults } = require('../../../lib/fetch') const { expect } = require('chai') const fs = require('fs') +const mockttp = require('mockttp') + describe('CallFetch', () => { - it('checks if the response is JSON while sending GET request', async () => { - const response = await callFetch({ - url: 'https://registry.npmjs.com/redis/0.1.0', - method: 'GET', - responseType: 'json' + describe('with mock server', () => { + const mockServer = mockttp.getLocal() + beforeEach(async () => await mockServer.start()) + afterEach(async () => await mockServer.stop()) + + it('checks if the response is JSON while sending GET request', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json') + await mockServer.forGet(path).thenReply(200, expected) + + const response = await callFetch({ + url: mockServer.urlFor(path), + method: 'GET', + json: true + }) + expect(response).to.be.deep.equal(JSON.parse(expected)) }) - expect(response).to.be.deep.equal(JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json'))) - }) - it('checks if the full response is fetched', async () => { - const response = await callFetch({ - url: 'https://registry.npmjs.com/redis/0.1.0', - method: 'GET', - responseType: 'json', - resolveWithFullResponse: true + it('checks if the full response is fetched', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json') + await mockServer.forGet(path).thenReply(200, expected) + + const response = await callFetch({ + url: mockServer.urlFor(path), + method: 'GET', + resolveWithFullResponse: true + }) + expect(response.status).to.be.equal(200) + expect(response.statusText).to.be.equal('OK') }) - expect(response.status).to.be.equal(200) - expect(response.statusText).to.be.equal('OK') - }) - it('checks if the full response is fetched with error code', async () => { - const response = await callFetch({ - url: 'https://registry.npmjs.com/redis/0.', - method: 'GET', - responseType: 'json', - resolveWithFullResponse: true + it('checks if the full response is fetched with error code', async () => { + const path = '/registry.npmjs.com/redis/0.1.' + await mockServer.forGet(path).thenReply(404) + + const response = await callFetch({ + url: mockServer.urlFor(path), + method: 'GET', + json: 'true', + resolveWithFullResponse: true + }) + expect(response.status).to.be.equal(404) + expect(response.statusText).to.be.equal('Not Found') }) - expect(response.status).to.be.equal(404) - expect(response.statusText).to.be.equal('Not Found') - }) - it('checks if the response is text while sending GET request', async () => { - const response = await callFetch({ - url: 'https://proxy.golang.org/rsc.io/quote/@v/v1.3.0.mod', - method: 'GET', - responseType: 'text' + it('checks if the response is text while sending GET request', async () => { + const path = '/proxy.golang.org/rsc.io/quote/@v/v1.3.0.mod' + await mockServer.forGet(path).thenReply(200, 'module "rsc.io/quote"\n') + + const response = await callFetch({ + url: mockServer.urlFor(path), + method: 'GET' + }) + expect(response).to.be.equal('module "rsc.io/quote"\n') + }) + + it('should download stream successfully with GET request', async () => { + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + const path = '/registry.npmjs.com/redis/0.1.0' + await mockServer.forGet(path).thenStream(200, fs.createReadStream('test/fixtures/fetch/redis-0.1.0.json')) + + const response = await callFetch({ + url: mockServer.urlFor(path), + encode: null + }) + const destination = 'test/fixtures/fetch/temp.json' + await new Promise((resolve) => { + response.pipe(fs.createWriteStream(destination).on('finish', () => resolve(true))) + }) + const downloaded = JSON.parse(fs.readFileSync(destination)) + expect(downloaded).to.be.deep.equal(expected) + fs.unlinkSync(destination) }) - expect(response).to.be.equal('module "rsc.io/quote"\n') - }) - //This test case downloads a crate package - //This URL would send a JSON response if the header is not provided as a part of request. - it('should follow redirect and download the package when responseType is stream', async () => { - const response = await callFetch({ - url: 'https://crates.io/api/v1/crates/bitflags/1.0.4/download', - method: 'GET', - responseType: 'stream', - headers: { - Accept: 'text/html' - } + it('should apply default headers ', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const url = mockServer.urlFor(path) + const endpointMock = await mockServer.forGet(path).thenReply(200) + + const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)' } } + const requestWithDefaults = withDefaults(defaultOptions) + await requestWithDefaults({ url }) + await requestWithDefaults({ url }) + + const requests = await endpointMock.getSeenRequests() + expect(requests.length).to.equal(2) + expect(requests[0].url).to.equal(url) + expect(requests[0].headers).to.include(defaultOptions.headers) + expect(requests[1].url).to.equal(url) + expect(requests[1].headers).to.include(defaultOptions.headers) }) - //Validating the length of the content inorder to verify the response is a crate package. - // JSON response would not return this header in response resulting in failing this test case. - expect(response.headers['content-length']).to.be.equal('15282') }) - it('should follow redirect and download the package when responseType is stream', async () => { - const response = await callFetch({ - url: 'https://static.crates.io/crates/bitflags/bitflags-1.0.4.crate', - method: 'GET', - responseType: 'stream' + describe('test crate download', () => { + // This test case downloads a crate package + // This URL would send a JSON response if the header is not provided as a part of request. + it('should follow redirect and download the package when responseType is stream', async () => { + const response = await callFetch({ + url: 'https://crates.io/api/v1/crates/bitflags/1.0.4/download', + method: 'GET', + encode: null, + headers: { + Accept: 'text/html' + } + }) + // Validating the length of the content in order to verify the response is a crate package. + // JSON response would not return this header in response resulting in failing this test case. + expect(response.headers['content-length']).to.be.equal('15282') + }) + + it('should download the package when responseType is stream', async () => { + const response = await callFetch({ + url: 'https://static.crates.io/crates/bitflags/bitflags-1.0.4.crate', + method: 'GET', + encode: null + }) + // Validating the length of the content inorder to verify the response is a crate package. + expect(response.headers['content-length']).to.be.equal('15282') }) - //Validating the length of the content inorder to verify the response is a crate package. - expect(response.headers['content-length']).to.be.equal('15282') }) }) From d512582438eb2dcacb8432420e4b749c88decc71 Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Sat, 6 Jul 2024 17:03:09 -0700 Subject: [PATCH 2/3] Add handling of simple --- lib/fetch.js | 16 +++++++++--- test/unit/lib/fetchTests.js | 49 ++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/lib/fetch.js b/lib/fetch.js index 0b08836e..ee3d77eb 100644 --- a/lib/fetch.js +++ b/lib/fetch.js @@ -11,12 +11,18 @@ function buildRequestOptions(request) { responseType = 'stream' } + const validateOptions = {} + if (!request.simple) { + validateOptions.validateStatus = () => true + } + return { method: request.method, url: request.url, responseType, headers: request.headers, - data: request.body + data: request.body, + ...validateOptions } } @@ -24,10 +30,12 @@ async function callFetch(request, axiosInstance = axios) { try { const options = buildRequestOptions(request) const response = await axiosInstance(options) - if (request.resolveWithFullResponse) return response - return response.data + if (!request.resolveWithFullResponse) return response.data + response.statusCode = response.status + response.statusMessage = response.statusText + return response } catch (error) { - if (error.response && request.resolveWithFullResponse) return error.response + error.statusCode = error.response?.status throw error } } diff --git a/test/unit/lib/fetchTests.js b/test/unit/lib/fetchTests.js index 0ed110f9..c93a7940 100644 --- a/test/unit/lib/fetchTests.js +++ b/test/unit/lib/fetchTests.js @@ -32,22 +32,22 @@ describe('CallFetch', () => { method: 'GET', resolveWithFullResponse: true }) - expect(response.status).to.be.equal(200) - expect(response.statusText).to.be.equal('OK') + expect(response.statusCode).to.be.equal(200) + expect(response.statusMessage).to.be.equal('OK') }) - it('checks if the full response is fetched with error code', async () => { + it('should throw error with error code', async () => { const path = '/registry.npmjs.com/redis/0.1.' await mockServer.forGet(path).thenReply(404) - const response = await callFetch({ + await callFetch({ url: mockServer.urlFor(path), method: 'GET', json: 'true', resolveWithFullResponse: true + }).catch(err => { + expect(err.statusCode).to.be.equal(404) }) - expect(response.status).to.be.equal(404) - expect(response.statusText).to.be.equal('Not Found') }) it('checks if the response is text while sending GET request', async () => { @@ -96,6 +96,43 @@ describe('CallFetch', () => { expect(requests[1].url).to.equal(url) expect(requests[1].headers).to.include(defaultOptions.headers) }) + + describe('test simple', () => { + it('should handle 300 when simple is true by default', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + await mockServer.forGet(path).thenReply(300, 'test') + + await callFetch({ + url: mockServer.urlFor(path) + }).catch(err => { + expect(err.statusCode).to.be.equal(300) + }) + }) + + it('should handle 300 with simple === false', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + await mockServer.forGet(path).thenReply(300, 'test') + + const response = await callFetch({ + url: mockServer.urlFor(path), + simple: false + }) + expect(response).to.be.equal('test') + }) + + it('should return status 300 with simple === false', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + await mockServer.forGet(path).thenReply(300, 'test') + + const response = await callFetch({ + url: mockServer.urlFor(path), + simple: false, + resolveWithFullResponse: true + }) + expect(response.statusCode).to.be.equal(300) + expect(response.statusMessage).to.be.equal('Multiple Choices') + }) + }) }) describe('test crate download', () => { From 2fc4a8e67519784ac10c0fc17baa50389da045e9 Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Sun, 7 Jul 2024 15:03:24 -0700 Subject: [PATCH 3/3] Fix simple --- lib/fetch.js | 2 +- .../providers/fetch/mavenBasedFetchTests.js | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/lib/fetch.js b/lib/fetch.js index ee3d77eb..1ffbc1e3 100644 --- a/lib/fetch.js +++ b/lib/fetch.js @@ -12,7 +12,7 @@ function buildRequestOptions(request) { } const validateOptions = {} - if (!request.simple) { + if (request.simple === false) { validateOptions.validateStatus = () => true } diff --git a/test/unit/providers/fetch/mavenBasedFetchTests.js b/test/unit/providers/fetch/mavenBasedFetchTests.js index 5b5a826e..5b3995cc 100644 --- a/test/unit/providers/fetch/mavenBasedFetchTests.js +++ b/test/unit/providers/fetch/mavenBasedFetchTests.js @@ -1,5 +1,8 @@ const { expect } = require('chai') const MavenBasedFetch = require('../../../../providers/fetch/mavenBasedFetch') +const mockttp = require('mockttp') +const sinon = require('sinon') +const Request = require('../../../../ghcrawler').request describe('MavenBasedFetch', () => { describe('find contained file stat', () => { @@ -16,4 +19,25 @@ describe('MavenBasedFetch', () => { expect(file.mtime.toISOString().includes('2022-02-24')) }) }) + + describe('Integration test for component not found', function () { + const path = '/remotecontent?filepath=' + const mockServer = mockttp.getLocal() + beforeEach(async () => await mockServer.start()) + afterEach(async () => await mockServer.stop()) + + it('should handle maven components not found', async () => { + const handler = new MavenBasedFetch( + { + mavencentral: mockServer.urlFor(path) + }, + { logger: { log: sinon.stub() } } + ) + await mockServer.forAnyRequest().thenReply(404) + const request = await handler.handle( + new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.') + ) + expect(request.processControl).to.be.equal('skip') + }) + }) })