Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade Azure Storage SDK to a modern version #629

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
12 changes: 9 additions & 3 deletions config/cdConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ const config = require('painless-config')

const cd_azblob = {
connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'),
container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME')
container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'),
account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'),
spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH')
}

const githubToken = config.get('CRAWLER_GITHUB_TOKEN')
Expand Down Expand Up @@ -111,7 +113,9 @@ module.exports = {
},
azqueue: {
connectionString: cd_azblob.connection,
queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests'
account: cd_azblob.account,
queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests',
spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH')
},
'cd(azblob)': cd_azblob,
'cd(file)': cd_file
Expand All @@ -135,7 +139,9 @@ module.exports = {
maxDequeueCount: 5,
attenuation: {
ttl: 3000
}
},
spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'),
account: cd_azblob.account
},
appVersion: config.get('APP_VERSION'),
buildsha: config.get('BUILD_SHA')
Expand Down
165 changes: 73 additions & 92 deletions ghcrawler/providers/queuing/storageQueue.js
Original file line number Diff line number Diff line change
@@ -1,50 +1,46 @@
// Copyright (c) Microsoft Corporation and others. Made available under the MIT license.
// SPDX-License-Identifier: MIT

// eslint-disable-next-line no-unused-vars
const { QueueServiceClient } = require('@azure/storage-queue')
const qlimit = require('qlimit')
const { cloneDeep } = require('lodash')

class StorageQueue {
/**
* @param {QueueServiceClient} client
* @param {string} name
* @param {string} queueName
* @param {object} formatter
* @param {object} options
*/
constructor(client, name, queueName, formatter, options) {
this.client = client
this.name = name
this.queueName = queueName
this.messageFormatter = formatter
this.options = options
this.logger = options.logger
this.queueClient = client.getQueueClient(this.queueName)
}

async subscribe() {
return new Promise((resolve, reject) => {
this.client.createQueueIfNotExists(this.queueName, error => {
if (error) {
return reject(error)
}
this.logger.info(`Subscribed to ${this.queueName} using Queue Storage`)
resolve()
})
})
await this.queueClient.createIfNotExists()
this.logger.info(`Subscribed to ${this.queueName} using Queue Storage`)
}

async unsubscribe() {
return
// No specific unsubscribe logic for Azure Queue Storage
}

async push(requests, option) {
async push(requests) {
requests = Array.isArray(requests) ? requests : [requests]
return Promise.all(
requests.map(
qlimit(this.options.parallelPush || 1)(request => {
qlimit(this.options.parallelPush || 1)(async request => {
const body = JSON.stringify(request)
return new Promise((resolve, reject) => {
this.client.createMessage(this.queueName, body, option, (error, queueMessageResult) => {
if (error) {
return reject(error)
}
this._log('Queued', request)
resolve(this._buildMessageReceipt(queueMessageResult, request))
})
})
const queueMessageResult = await this.queueClient.sendMessage(body)
this._log('Queued', request)
return this._buildMessageReceipt(queueMessageResult, request)
})
)
)
Expand All @@ -56,47 +52,50 @@ class StorageQueue {
}

async pop() {
const msgOptions = { numOfMessages: 1, visibilityTimeout: this.options.visibilityTimeout || 60 * 60 }
return new Promise((resolve, reject) => {
this.client.getMessages(this.queueName, msgOptions, (error, result) => {
if (error) {
return reject(error)
}
const message = result[0]
if (!message) {
this.logger.verbose('No messages to receive')
return resolve(null)
}
if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) {
this.logger.verbose('maxDequeueCount exceeded')
this.client.deleteMessage(this.queueName, message.messageId, message.popReceipt, error => {
if (error) return reject(error)
resolve(null)
})
} else {
message.body = JSON.parse(message.messageText)
const request = this.messageFormatter(message)
request._message = message
this._log('Popped', message.body)
resolve(request)
}
})
})
const msgOptions = { numberOfMessages: 1, visibilityTimeout: this.options.visibilityTimeout || 60 * 60 }
const response = await this.queueClient.receiveMessages(msgOptions)
const message = response.receivedMessageItems[0]
if (!message) {
this.logger.verbose('No messages to receive')
return null
}
if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) {
this.logger.verbose('maxDequeueCount exceeded')
try {
await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
} catch (error) {
// Ignore error
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the error be logged here? In the previous logic, the promise is rejected on line 73 prior to change.

}
return null
} else {
try {
const decodedText = message.messageText
.replace(/"/g, '"')
.replace(/&/g, '&')
.replace(/'/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
message.body = JSON.parse(decodedText)
} catch (error) {
this.logger.error(`Failed to parse message ${message.messageId}:`)
this.logger.error(`Raw message: ${message.messageText}`)
this.logger.error(`Parse error: ${error.message}`)
await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
return null
}
const request = this.messageFormatter(message)
request._message = message
this._log('Popped', message.body)
return request
}
}

async done(request) {
if (!request || !request._message) {
return
}
return new Promise((resolve, reject) => {
this.client.deleteMessage(this.queueName, request._message.messageId, request._message.popReceipt, error => {
if (error) {
return reject(error)
}
this._log('ACKed', request._message.body)
resolve()
})
})
await this.queueClient.deleteMessage(request._message.messageId, request._message.popReceipt)
this._log('ACKed', request._message.body)
}

async defer(request) {
Expand All @@ -110,47 +109,29 @@ class StorageQueue {
await this.updateVisibilityTimeout(request)
}

updateVisibilityTimeout(request, visibilityTimeout = 0) {
return new Promise((resolve, reject) => {
// visibilityTimeout is updated to 0 to unlock/unlease the message
this.client.updateMessage(
this.queueName,
request._message.messageId,
request._message.popReceipt,
visibilityTimeout,
(error, result) => {
if (error) {
return reject(error)
}
this._log('NAKed', request._message.body)
resolve(this._buildMessageReceipt(result, request._message.body))
}
)
})
async updateVisibilityTimeout(request, visibilityTimeout = 0) {
await this.queueClient.updateMessage(
request._message.messageId,
request._message.popReceipt,
undefined,
visibilityTimeout
)
this._log('NAKed', request._message.body)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the message receipt be returned here, same as on 126 prior to change?

}

async flush() {
return new Promise((resolve, reject) => {
this.client.deleteQueue(this.queueName, error => {
if (error) return reject(error)
this.client.createQueueIfNotExists(this.queueName, error => {
if (error) return reject(error)
resolve()
})
})
})
await this.queueClient.clearMessages()
this.logger.info(`Flushed all messages from ${this.queueName}`)
}

async getInfo() {
return new Promise(resolve => {
this.client.getQueueMetadata(this.queueName, (result, error) => {
if (error) {
this.logger.error(error)
resolve(null)
}
resolve({ count: result[0].approximateMessageCount })
})
})
try {
const properties = await this.queueClient.getProperties()
return { count: properties.approximateMessagesCount }
} catch (error) {
this.logger.error(error)
return null
}
}

getName() {
Expand Down
29 changes: 25 additions & 4 deletions ghcrawler/providers/queuing/storageQueueManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,35 @@
// SPDX-License-Identifier: MIT

const AttenuatedQueue = require('./attenuatedQueue')
const AzureStorage = require('azure-storage')
const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue')
const Request = require('../../lib/request')
const StorageQueue = require('./storageQueue')
const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity')

class StorageQueueManager {
constructor(connectionString) {
const retryOperations = new AzureStorage.ExponentialRetryPolicyFilter()
this.client = AzureStorage.createQueueService(connectionString).withFilter(retryOperations)
constructor(connectionString, options) {
const pipelineOptions = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the use of pipelineOptions. Makes it easier to see the configs related to the queues.

retryOptions: {
maxTries: 3,
retryDelayInMs: 1000,
maxRetryDelayInMs: 120 * 1000,
tryTimeoutInMs: 30000,
retryPolicyType: StorageRetryPolicyType.EXPONENTIAL
}
}
if (connectionString) {
this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
} else {
const { account, spnAuth } = options
let credential
if (spnAuth) {
const authParsed = JSON.parse(spnAuth)
credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
} else {
credential = new DefaultAzureCredential()
}
this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions)
}
}

createQueueClient(name, formatter, options) {
Expand Down
50 changes: 43 additions & 7 deletions ghcrawler/providers/storage/azureBlobFactory.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,51 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-License-Identifier: MIT

const AzureStorage = require('azure-storage')
// @ts-check
const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob')
const AzureStorageDocStore = require('./storageDocStore')
const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity')

/**
* @param {object} options
* @param {string} options.account
* @param {string} options.connection
* @param {string} options.container
* @param {object} options.logger
* @param {object} options.spnAuth
*/
module.exports = options => {
options.logger.info('creating azure storage store')
const { account, key, connection, container } = options
const retryOperations = new AzureStorage.ExponentialRetryPolicyFilter()
const blobService = connection
? AzureStorage.createBlobService(connection).withFilter(retryOperations)
: AzureStorage.createBlobService(account, key).withFilter(retryOperations)
return new AzureStorageDocStore(blobService, container, options)
const { account, connection, container, spnAuth } = options

const pipelineOptions = {
retryOptions: {
maxTries: 3,
retryDelayInMs: 1000,
maxRetryDelayInMs: 120 * 1000,
tryTimeoutInMs: 30000,
retryPolicyType: StorageRetryPolicyType.EXPONENTIAL
}
}

let blobServiceClient
if (connection) {
options.logger.info('using connection string')
blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions)
} else {
let credential
if (spnAuth) {
const authParsed = JSON.parse(spnAuth)
credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
options.logger.info('using service principal credentials')
} else {
credential = new DefaultAzureCredential()
options.logger.info('using default credentials')
}
blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, credential, pipelineOptions)
}

const containerClient = blobServiceClient.getContainerClient(container)

return new AzureStorageDocStore(containerClient, options)
}
Loading
Loading