Merge pull request #5888 from matrix-org/travis/voice/event_type

Expand upon voice message event & include overall waveform
This commit is contained in:
Travis Ralston 2021-04-22 14:41:59 -06:00 committed by GitHub
commit 06726d38fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 214 additions and 20 deletions

View File

@ -129,4 +129,30 @@ declare global {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/columnNumber
columnNumber?: number;
}
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
interface AudioWorkletProcessor {
readonly port: MessagePort;
process(
inputs: Float32Array[][],
outputs: Float32Array[][],
parameters: Record<string, Float32Array>
): boolean;
}
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
const AudioWorkletProcessor: {
prototype: AudioWorkletProcessor;
new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor;
};
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
function registerProcessor(
name: string,
processorCtor: (new (
options?: AudioWorkletNodeOptions
) => AudioWorkletProcessor) & {
parameterDescriptors?: AudioParamDescriptor[];
}
);
}

View File

@ -53,9 +53,38 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
await this.state.recorder.stop();
const mxc = await this.state.recorder.upload();
MatrixClientPeg.get().sendMessage(this.props.room.roomId, {
body: "Voice message",
msgtype: "org.matrix.msc2516.voice",
url: mxc,
"body": "Voice message",
"msgtype": "org.matrix.msc2516.voice",
//"msgtype": MsgType.Audio,
"url": mxc,
"info": {
duration: Math.round(this.state.recorder.durationSeconds * 1000),
mimetype: this.state.recorder.contentType,
size: this.state.recorder.contentLength,
},
// MSC1767 experiment
"org.matrix.msc1767.text": "Voice message",
"org.matrix.msc1767.file": {
url: mxc,
name: "Voice message.ogg",
mimetype: this.state.recorder.contentType,
size: this.state.recorder.contentLength,
},
"org.matrix.msc1767.audio": {
duration: Math.round(this.state.recorder.durationSeconds * 1000),
// TODO: @@ TravisR: Waveform? (MSC1767 decision)
},
"org.matrix.experimental.msc2516.voice": { // MSC2516+MSC1767 experiment
duration: Math.round(this.state.recorder.durationSeconds * 1000),
// Events can't have floats, so we try to maintain resolution by using 1024
// as a maximum value. The waveform contains values between zero and 1, so this
// should come out largely sane.
//
// We're expecting about one data point per second of audio.
waveform: this.state.recorder.finalWaveform.map(v => Math.round(v * 1024)),
},
});
await VoiceRecordingStore.instance.disposeRecording();
this.setState({recorder: null});

View File

@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {
* @param a The array to clone. Must be defined.
* @returns A copy of the array.
*/
export function arrayFastClone(a: any[]): any[] {
export function arrayFastClone<T>(a: T[]): T[] {
return a.slice(0, a.length);
}

View File

@ -0,0 +1,67 @@
/*
Copyright 2021 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
import {percentageOf} from "../utils/numbers";
// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
declare const currentTime: number;
// declare const currentFrame: number;
// declare const sampleRate: number;
class MxVoiceWorklet extends AudioWorkletProcessor {
private nextAmplitudeSecond = 0;
process(inputs, outputs, parameters) {
// We only fire amplitude updates once a second to avoid flooding the recording instance
// with useless data. Much of the data would end up discarded, so we ratelimit ourselves
// here.
const currentSecond = Math.round(currentTime);
if (currentSecond === this.nextAmplitudeSecond) {
// We're expecting exactly one mono input source, so just grab the very first frame of
// samples for the analysis.
const monoChan = inputs[0][0];
// The amplitude of the frame's samples is effectively the loudness of the frame. This
// translates into a bar which can be rendered as part of the whole recording clip's
// waveform.
//
// We translate the amplitude down to 0-1 for sanity's sake.
const minVal = Math.min(...monoChan);
const maxVal = Math.max(...monoChan);
const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
this.port.postMessage(<IAmplitudePayload>{
ev: PayloadEvent.AmplitudeMark,
amplitude: amplitude,
forSecond: currentSecond,
});
this.nextAmplitudeSecond++;
}
// We mostly use this worklet to fire regular clock updates through to components
this.port.postMessage(<ITimingPayload>{ev: PayloadEvent.Timekeep, timeSeconds: currentTime});
// We're supposed to return false when we're "done" with the audio clip, but seeing as
// we are acting as a passive processor we are never truly "done". The browser will clean
// us up when it is done with us.
return true;
}
}
registerProcessor(WORKLET_NAME, MxVoiceWorklet);
export default null; // to appease module loaders (we never use the export)

View File

@ -23,6 +23,8 @@ import {clamp} from "../utils/numbers";
import EventEmitter from "events";
import {IDestroyable} from "../utils/IDestroyable";
import {Singleflight} from "../utils/Singleflight";
import {PayloadEvent, WORKLET_NAME} from "./consts";
import {arrayFastClone} from "../utils/arrays";
const CHANNELS = 1; // stereo isn't important
const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
@ -49,16 +51,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
private recorderSource: MediaStreamAudioSourceNode;
private recorderStream: MediaStream;
private recorderFFT: AnalyserNode;
private recorderProcessor: ScriptProcessorNode;
private recorderWorklet: AudioWorkletNode;
private buffer = new Uint8Array(0);
private mxc: string;
private recording = false;
private observable: SimpleObservable<IRecordingUpdate>;
private amplitudes: number[] = []; // at each second mark, generated
public constructor(private client: MatrixClient) {
super();
}
public get finalWaveform(): number[] {
return arrayFastClone(this.amplitudes);
}
public get contentType(): string {
return "audio/ogg";
}
public get contentLength(): number {
return this.buffer.length;
}
public get durationSeconds(): number {
if (!this.recorder) throw new Error("Duration not available without a recording");
return this.recorderContext.currentTime;
}
private async makeRecorder() {
this.recorderStream = await navigator.mediaDevices.getUserMedia({
audio: {
@ -80,18 +100,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
// it makes the time domain less than helpful.
this.recorderFFT.fftSize = 64;
// We use an audio processor to get accurate timing information.
// The size of the audio buffer largely decides how quickly we push timing/waveform data
// out of this class. Smaller buffers mean we update more frequently as we can't hold as
// many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of
// updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime
// as possible. Must be a power of 2.
this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS);
// Set up our worklet. We use this for timing information and waveform analysis: the
// web audio API prefers this be done async to avoid holding the main thread with math.
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
if (!mxRecorderWorkletPath) {
throw new Error("Unable to create recorder: no worklet script registered");
}
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
// Connect our inputs and outputs
this.recorderSource.connect(this.recorderFFT);
this.recorderSource.connect(this.recorderProcessor);
this.recorderProcessor.connect(this.recorderContext.destination);
this.recorderSource.connect(this.recorderWorklet);
this.recorderWorklet.connect(this.recorderContext.destination);
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
this.recorderWorklet.port.onmessage = (ev) => {
switch (ev.data['ev']) {
case PayloadEvent.Timekeep:
this.processAudioUpdate(ev.data['timeSeconds']);
break;
case PayloadEvent.AmplitudeMark:
// Sanity check to make sure we're adding about one sample per second
if (ev.data['forSecond'] === this.amplitudes.length) {
this.amplitudes.push(ev.data['amplitude']);
}
break;
}
};
this.recorder = new Recorder({
encoderPath, // magic from webpack
@ -138,7 +174,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
return this.mxc;
}
private processAudioUpdate = (ev: AudioProcessingEvent) => {
private processAudioUpdate = (timeSeconds: number) => {
if (!this.recording) return;
// The time domain is the input to the FFT, which means we use an array of the same
@ -162,12 +198,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
this.observable.update({
waveform: translatedData,
timeSeconds: ev.playbackTime,
timeSeconds: timeSeconds,
});
// Now that we've updated the data/waveform, let's do a time check. We don't want to
// go horribly over the limit. We also emit a warning state if needed.
const secondsLeft = TARGET_MAX_LENGTH - ev.playbackTime;
const secondsLeft = TARGET_MAX_LENGTH - timeSeconds;
if (secondsLeft <= 0) {
// noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping
this.stop();
@ -191,7 +227,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
}
this.observable = new SimpleObservable<IRecordingUpdate>();
await this.makeRecorder();
this.recorderProcessor.addEventListener("audioprocess", this.processAudioUpdate);
await this.recorder.start();
this.recording = true;
this.emit(RecordingState.Started);
@ -205,6 +240,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
// Disconnect the source early to start shutting down resources
this.recorderSource.disconnect();
this.recorderWorklet.disconnect();
await this.recorder.stop();
// close the context after the recorder so the recorder doesn't try to
@ -216,7 +252,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
// Finally do our post-processing and clean up
this.recording = false;
this.recorderProcessor.removeEventListener("audioprocess", this.processAudioUpdate);
await this.recorder.close();
this.emit(RecordingState.Ended);
@ -240,7 +275,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
this.emit(RecordingState.Uploading);
this.mxc = await this.client.uploadContent(new Blob([this.buffer], {
type: "audio/ogg",
type: this.contentType,
}), {
onlyContentUri: false, // to stop the warnings in the console
}).then(r => r['content_uri']);

37
src/voice/consts.ts Normal file
View File

@ -0,0 +1,37 @@
/*
Copyright 2021 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
export const WORKLET_NAME = "mx-voice-worklet";
export enum PayloadEvent {
Timekeep = "timekeep",
AmplitudeMark = "amplitude_mark",
}
export interface IPayload {
ev: PayloadEvent;
}
export interface ITimingPayload extends IPayload {
ev: PayloadEvent.Timekeep;
timeSeconds: number;
}
export interface IAmplitudePayload extends IPayload {
ev: PayloadEvent.AmplitudeMark;
forSecond: number;
amplitude: number;
}