Skip to content

Commit

Permalink
fix: avoid regexp when checking long base64 data uris
Browse files Browse the repository at this point in the history
  • Loading branch information
devniel committed Aug 26, 2024
1 parent 48aca4f commit c0d7223
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
26 changes: 26 additions & 0 deletions packages/app/src/lib/utils/getTypeAndExtension.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,29 @@ test('getTypeAndExtension', () => {
outputType: ClapOutputType.VIDEO,
})
})

/**
* Related to the `Maximum call stack size exceeded` issue
* when using RegExp, now with string/array manipulation is
* much faster and the `stack` error solved; I wasn't able to easily
* replicate the stack size error in vitest env, seems happening only
* in Next env; so only a "performance" test is done.
*
* Issue: https://github.com/jbilcke-hf/clapper/issues/72
*/
test('getTypeAndExtension should be fast for long uris', () => {
const startTime = Date.now()
const longBase64String = 'a'.repeat(500_000_000)
const dataUri = `data:image/png;base64,${longBase64String}`
const result = getTypeAndExtension(dataUri)
expect(result).toStrictEqual({
assetFileFormat: 'image/png',
category: 'image',
extension: 'png',
outputType: ClapOutputType.IMAGE,
})
const endTime = Date.now()
const duration = endTime - startTime
// Original regexp approach was running around ~350ms; new one is around ~70ms
expect(duration).toBeLessThan(200)
})
16 changes: 7 additions & 9 deletions packages/app/src/lib/utils/getTypeAndExtension.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { ClapOutputType } from '@aitube/clap'

/**
* break a base64 string into sub-components
* break a base64 data uri string into sub-components
*/
export function getTypeAndExtension(base64: string = ''): {
// category eg. video, audio, text
Expand All @@ -15,17 +15,15 @@ export function getTypeAndExtension(base64: string = ''): {

outputType: ClapOutputType
} {
// Regular expression to extract the MIME type and the base64 data
const matches = base64.match(/^data:([A-Za-z-+0-9/]+);base64,(.+)$/)

if (!matches || matches.length !== 3) {
throw new Error('Invalid base64 string')
if (!base64.startsWith('data:') || !base64.includes('base64,')) {
throw new Error('Invalid base64 data uri provided.')
}

const assetFileFormat = matches[1] || ''
const base64Index = base64.indexOf('base64,')
const mimeType = base64.slice(5, base64Index - 1)

// this should be enough for most media formats (jpeg, png, webp, mp4)
const [category, extension] = assetFileFormat.split('/')
const [category, extension] = mimeType.split('/')

let outputType = ClapOutputType.TEXT

Expand All @@ -39,7 +37,7 @@ export function getTypeAndExtension(base64: string = ''): {

return {
category,
assetFileFormat,
assetFileFormat: mimeType,
extension,
outputType,
}
Expand Down

0 comments on commit c0d7223

Please sign in to comment.