Skip to content

Commit

Permalink
wav
Browse files Browse the repository at this point in the history
  • Loading branch information
Dooy committed Oct 9, 2024
1 parent 411c807 commit c46473d
Show file tree
Hide file tree
Showing 12 changed files with 1,657 additions and 0 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"vue-waterfall-plugin-next": "^2.3.1"
},
"devDependencies": {
"@openai/realtime-api-beta": "github:dooy/openai-realtime-api-beta",
"@openai/realtime-wavtools": "github:dooy/openai-realtime-wavtools",
"@antfu/eslint-config": "^0.35.3",
"@commitlint/cli": "^17.4.4",
"@commitlint/config-conventional": "^17.4.4",
Expand Down
41 changes: 41 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions src/lib/wavtools/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { WavPacker } from './lib/wav_packer.js';
import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
import { WavStreamPlayer } from './lib/wav_stream_player.js';
import { WavRecorder } from './lib/wav_recorder.js';

export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
203 changes: 203 additions & 0 deletions src/lib/wavtools/lib/analysis/audio_analysis.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
import {
noteFrequencies,
noteFrequencyLabels,
voiceFrequencies,
voiceFrequencyLabels,
} from './constants.js';

/**
* Output of AudioAnalysis for the frequency domain of the audio
* @typedef {Object} AudioAnalysisOutputType
* @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
* @property {number[]} frequencies Raw frequency bucket values
* @property {string[]} labels Labels for the frequency bucket values
*/

/**
* Analyzes audio for visual output
* @class
*/
export class AudioAnalysis {
/**
* Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
* returns human-readable formatting and labels
* @param {AnalyserNode} analyser
* @param {number} sampleRate
* @param {Float32Array} [fftResult]
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
static getFrequencies(
analyser,
sampleRate,
fftResult,
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
if (!fftResult) {
fftResult = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatFrequencyData(fftResult);
}
const nyquistFrequency = sampleRate / 2;
const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
let outputValues;
let frequencies;
let labels;
if (analysisType === 'music' || analysisType === 'voice') {
const useFrequencies =
analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
for (let i = 0; i < fftResult.length; i++) {
const frequency = i * frequencyStep;
const amplitude = fftResult[i];
for (let n = useFrequencies.length - 1; n >= 0; n--) {
if (frequency > useFrequencies[n]) {
aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
break;
}
}
}
outputValues = aggregateOutput;
frequencies =
analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
labels =
analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
} else {
outputValues = Array.from(fftResult);
frequencies = outputValues.map((_, i) => frequencyStep * i);
labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
}
// We normalize to {0, 1}
const normalizedOutput = outputValues.map((v) => {
return Math.max(
0,
Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
);
});
const values = new Float32Array(normalizedOutput);
return {
values,
frequencies,
labels,
};
}

/**
* Creates a new AudioAnalysis instance for an HTMLAudioElement
* @param {HTMLAudioElement} audioElement
* @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
* @returns {AudioAnalysis}
*/
constructor(audioElement, audioBuffer = null) {
this.fftResults = [];
if (audioBuffer) {
/**
* Modified from
* https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
*
* We do this to populate FFT values for the audio if provided an `audioBuffer`
* The reason to do this is that Safari fails when using `createMediaElementSource`
* This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
*/
const { length, sampleRate } = audioBuffer;
const offlineAudioContext = new OfflineAudioContext({
length,
sampleRate,
});
const source = offlineAudioContext.createBufferSource();
source.buffer = audioBuffer;
const analyser = offlineAudioContext.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
source.connect(analyser);
// limit is :: 128 / sampleRate;
// but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
const renderQuantumInSeconds = 1 / 60;
const durationInSeconds = length / sampleRate;
const analyze = (index) => {
const suspendTime = renderQuantumInSeconds * index;
if (suspendTime < durationInSeconds) {
offlineAudioContext.suspend(suspendTime).then(() => {
const fftResult = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatFrequencyData(fftResult);
this.fftResults.push(fftResult);
analyze(index + 1);
});
}
if (index === 1) {
offlineAudioContext.startRendering();
} else {
offlineAudioContext.resume();
}
};
source.start(0);
analyze(1);
this.audio = audioElement;
this.context = offlineAudioContext;
this.analyser = analyser;
this.sampleRate = sampleRate;
this.audioBuffer = audioBuffer;
} else {
const audioContext = new AudioContext();
const track = audioContext.createMediaElementSource(audioElement);
const analyser = audioContext.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
track.connect(analyser);
analyser.connect(audioContext.destination);
this.audio = audioElement;
this.context = audioContext;
this.analyser = analyser;
this.sampleRate = this.context.sampleRate;
this.audioBuffer = null;
}
}

/**
* Gets the current frequency domain data from the playing audio track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
let fftResult = null;
if (this.audioBuffer && this.fftResults.length) {
const pct = this.audio.currentTime / this.audio.duration;
const index = Math.min(
(pct * this.fftResults.length) | 0,
this.fftResults.length - 1,
);
fftResult = this.fftResults[index];
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
fftResult,
analysisType,
minDecibels,
maxDecibels,
);
}

/**
* Resume the internal AudioContext if it was suspended due to the lack of
* user interaction when the AudioAnalysis was instantiated.
* @returns {Promise<true>}
*/
async resumeIfSuspended() {
if (this.context.state === 'suspended') {
await this.context.resume();
}
return true;
}
}

globalThis.AudioAnalysis = AudioAnalysis;
60 changes: 60 additions & 0 deletions src/lib/wavtools/lib/analysis/constants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/**
* Constants for help with visualization
* Helps map frequency ranges from Fast Fourier Transform
* to human-interpretable ranges, notably music ranges and
* human vocal ranges.
*/

// Eighth octave frequencies
const octave8Frequencies = [
4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
6644.88, 7040.0, 7458.62, 7902.13,
];

// Labels for each of the above frequencies
const octave8FrequencyLabels = [
'C',
'C#',
'D',
'D#',
'E',
'F',
'F#',
'G',
'G#',
'A',
'A#',
'B',
];

/**
* All note frequencies from 1st to 8th octave
* in format "A#8" (A#, 8th octave)
*/
export const noteFrequencies = [];
export const noteFrequencyLabels = [];
for (let i = 1; i <= 8; i++) {
for (let f = 0; f < octave8Frequencies.length; f++) {
const freq = octave8Frequencies[f];
noteFrequencies.push(freq / Math.pow(2, 8 - i));
noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
}
}

/**
* Subset of the note frequencies between 32 and 2000 Hz
* 6 octave range: C1 to B6
*/
const voiceFrequencyRange = [32.0, 2000.0];
export const voiceFrequencies = noteFrequencies.filter((_, i) => {
return (
noteFrequencies[i] > voiceFrequencyRange[0] &&
noteFrequencies[i] < voiceFrequencyRange[1]
);
});
export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
return (
noteFrequencies[i] > voiceFrequencyRange[0] &&
noteFrequencies[i] < voiceFrequencyRange[1]
);
});
Loading

0 comments on commit c46473d

Please sign in to comment.