mirror of
https://github.com/vector-im/element-web.git
synced 2024-11-16 05:04:57 +08:00
Merge pull request #5888 from matrix-org/travis/voice/event_type
Expand upon voice message event & include overall waveform
This commit is contained in:
commit
06726d38fe
26
src/@types/global.d.ts
vendored
26
src/@types/global.d.ts
vendored
@ -129,4 +129,30 @@ declare global {
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/columnNumber
|
||||
columnNumber?: number;
|
||||
}
|
||||
|
||||
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
|
||||
interface AudioWorkletProcessor {
|
||||
readonly port: MessagePort;
|
||||
process(
|
||||
inputs: Float32Array[][],
|
||||
outputs: Float32Array[][],
|
||||
parameters: Record<string, Float32Array>
|
||||
): boolean;
|
||||
}
|
||||
|
||||
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
|
||||
const AudioWorkletProcessor: {
|
||||
prototype: AudioWorkletProcessor;
|
||||
new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor;
|
||||
};
|
||||
|
||||
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
|
||||
function registerProcessor(
|
||||
name: string,
|
||||
processorCtor: (new (
|
||||
options?: AudioWorkletNodeOptions
|
||||
) => AudioWorkletProcessor) & {
|
||||
parameterDescriptors?: AudioParamDescriptor[];
|
||||
}
|
||||
);
|
||||
}
|
||||
|
@ -53,9 +53,38 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
|
||||
await this.state.recorder.stop();
|
||||
const mxc = await this.state.recorder.upload();
|
||||
MatrixClientPeg.get().sendMessage(this.props.room.roomId, {
|
||||
body: "Voice message",
|
||||
msgtype: "org.matrix.msc2516.voice",
|
||||
url: mxc,
|
||||
"body": "Voice message",
|
||||
"msgtype": "org.matrix.msc2516.voice",
|
||||
//"msgtype": MsgType.Audio,
|
||||
"url": mxc,
|
||||
"info": {
|
||||
duration: Math.round(this.state.recorder.durationSeconds * 1000),
|
||||
mimetype: this.state.recorder.contentType,
|
||||
size: this.state.recorder.contentLength,
|
||||
},
|
||||
|
||||
// MSC1767 experiment
|
||||
"org.matrix.msc1767.text": "Voice message",
|
||||
"org.matrix.msc1767.file": {
|
||||
url: mxc,
|
||||
name: "Voice message.ogg",
|
||||
mimetype: this.state.recorder.contentType,
|
||||
size: this.state.recorder.contentLength,
|
||||
},
|
||||
"org.matrix.msc1767.audio": {
|
||||
duration: Math.round(this.state.recorder.durationSeconds * 1000),
|
||||
// TODO: @@ TravisR: Waveform? (MSC1767 decision)
|
||||
},
|
||||
"org.matrix.experimental.msc2516.voice": { // MSC2516+MSC1767 experiment
|
||||
duration: Math.round(this.state.recorder.durationSeconds * 1000),
|
||||
|
||||
// Events can't have floats, so we try to maintain resolution by using 1024
|
||||
// as a maximum value. The waveform contains values between zero and 1, so this
|
||||
// should come out largely sane.
|
||||
//
|
||||
// We're expecting about one data point per second of audio.
|
||||
waveform: this.state.recorder.finalWaveform.map(v => Math.round(v * 1024)),
|
||||
},
|
||||
});
|
||||
await VoiceRecordingStore.instance.disposeRecording();
|
||||
this.setState({recorder: null});
|
||||
|
@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {
|
||||
* @param a The array to clone. Must be defined.
|
||||
* @returns A copy of the array.
|
||||
*/
|
||||
export function arrayFastClone(a: any[]): any[] {
|
||||
export function arrayFastClone<T>(a: T[]): T[] {
|
||||
return a.slice(0, a.length);
|
||||
}
|
||||
|
||||
|
67
src/voice/RecorderWorklet.ts
Normal file
67
src/voice/RecorderWorklet.ts
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
|
||||
import {percentageOf} from "../utils/numbers";
|
||||
|
||||
// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
|
||||
declare const currentTime: number;
|
||||
// declare const currentFrame: number;
|
||||
// declare const sampleRate: number;
|
||||
|
||||
class MxVoiceWorklet extends AudioWorkletProcessor {
|
||||
private nextAmplitudeSecond = 0;
|
||||
|
||||
process(inputs, outputs, parameters) {
|
||||
// We only fire amplitude updates once a second to avoid flooding the recording instance
|
||||
// with useless data. Much of the data would end up discarded, so we ratelimit ourselves
|
||||
// here.
|
||||
const currentSecond = Math.round(currentTime);
|
||||
if (currentSecond === this.nextAmplitudeSecond) {
|
||||
// We're expecting exactly one mono input source, so just grab the very first frame of
|
||||
// samples for the analysis.
|
||||
const monoChan = inputs[0][0];
|
||||
|
||||
// The amplitude of the frame's samples is effectively the loudness of the frame. This
|
||||
// translates into a bar which can be rendered as part of the whole recording clip's
|
||||
// waveform.
|
||||
//
|
||||
// We translate the amplitude down to 0-1 for sanity's sake.
|
||||
const minVal = Math.min(...monoChan);
|
||||
const maxVal = Math.max(...monoChan);
|
||||
const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
|
||||
|
||||
this.port.postMessage(<IAmplitudePayload>{
|
||||
ev: PayloadEvent.AmplitudeMark,
|
||||
amplitude: amplitude,
|
||||
forSecond: currentSecond,
|
||||
});
|
||||
this.nextAmplitudeSecond++;
|
||||
}
|
||||
|
||||
// We mostly use this worklet to fire regular clock updates through to components
|
||||
this.port.postMessage(<ITimingPayload>{ev: PayloadEvent.Timekeep, timeSeconds: currentTime});
|
||||
|
||||
// We're supposed to return false when we're "done" with the audio clip, but seeing as
|
||||
// we are acting as a passive processor we are never truly "done". The browser will clean
|
||||
// us up when it is done with us.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor(WORKLET_NAME, MxVoiceWorklet);
|
||||
|
||||
export default null; // to appease module loaders (we never use the export)
|
@ -23,6 +23,8 @@ import {clamp} from "../utils/numbers";
|
||||
import EventEmitter from "events";
|
||||
import {IDestroyable} from "../utils/IDestroyable";
|
||||
import {Singleflight} from "../utils/Singleflight";
|
||||
import {PayloadEvent, WORKLET_NAME} from "./consts";
|
||||
import {arrayFastClone} from "../utils/arrays";
|
||||
|
||||
const CHANNELS = 1; // stereo isn't important
|
||||
const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
|
||||
@ -49,16 +51,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
private recorderSource: MediaStreamAudioSourceNode;
|
||||
private recorderStream: MediaStream;
|
||||
private recorderFFT: AnalyserNode;
|
||||
private recorderProcessor: ScriptProcessorNode;
|
||||
private recorderWorklet: AudioWorkletNode;
|
||||
private buffer = new Uint8Array(0);
|
||||
private mxc: string;
|
||||
private recording = false;
|
||||
private observable: SimpleObservable<IRecordingUpdate>;
|
||||
private amplitudes: number[] = []; // at each second mark, generated
|
||||
|
||||
public constructor(private client: MatrixClient) {
|
||||
super();
|
||||
}
|
||||
|
||||
public get finalWaveform(): number[] {
|
||||
return arrayFastClone(this.amplitudes);
|
||||
}
|
||||
|
||||
public get contentType(): string {
|
||||
return "audio/ogg";
|
||||
}
|
||||
|
||||
public get contentLength(): number {
|
||||
return this.buffer.length;
|
||||
}
|
||||
|
||||
public get durationSeconds(): number {
|
||||
if (!this.recorder) throw new Error("Duration not available without a recording");
|
||||
return this.recorderContext.currentTime;
|
||||
}
|
||||
|
||||
private async makeRecorder() {
|
||||
this.recorderStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
@ -80,18 +100,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
// it makes the time domain less than helpful.
|
||||
this.recorderFFT.fftSize = 64;
|
||||
|
||||
// We use an audio processor to get accurate timing information.
|
||||
// The size of the audio buffer largely decides how quickly we push timing/waveform data
|
||||
// out of this class. Smaller buffers mean we update more frequently as we can't hold as
|
||||
// many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of
|
||||
// updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime
|
||||
// as possible. Must be a power of 2.
|
||||
this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS);
|
||||
// Set up our worklet. We use this for timing information and waveform analysis: the
|
||||
// web audio API prefers this be done async to avoid holding the main thread with math.
|
||||
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
|
||||
if (!mxRecorderWorkletPath) {
|
||||
throw new Error("Unable to create recorder: no worklet script registered");
|
||||
}
|
||||
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
|
||||
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
|
||||
|
||||
// Connect our inputs and outputs
|
||||
this.recorderSource.connect(this.recorderFFT);
|
||||
this.recorderSource.connect(this.recorderProcessor);
|
||||
this.recorderProcessor.connect(this.recorderContext.destination);
|
||||
this.recorderSource.connect(this.recorderWorklet);
|
||||
this.recorderWorklet.connect(this.recorderContext.destination);
|
||||
|
||||
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
|
||||
this.recorderWorklet.port.onmessage = (ev) => {
|
||||
switch (ev.data['ev']) {
|
||||
case PayloadEvent.Timekeep:
|
||||
this.processAudioUpdate(ev.data['timeSeconds']);
|
||||
break;
|
||||
case PayloadEvent.AmplitudeMark:
|
||||
// Sanity check to make sure we're adding about one sample per second
|
||||
if (ev.data['forSecond'] === this.amplitudes.length) {
|
||||
this.amplitudes.push(ev.data['amplitude']);
|
||||
}
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
this.recorder = new Recorder({
|
||||
encoderPath, // magic from webpack
|
||||
@ -138,7 +174,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
return this.mxc;
|
||||
}
|
||||
|
||||
private processAudioUpdate = (ev: AudioProcessingEvent) => {
|
||||
private processAudioUpdate = (timeSeconds: number) => {
|
||||
if (!this.recording) return;
|
||||
|
||||
// The time domain is the input to the FFT, which means we use an array of the same
|
||||
@ -162,12 +198,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
|
||||
this.observable.update({
|
||||
waveform: translatedData,
|
||||
timeSeconds: ev.playbackTime,
|
||||
timeSeconds: timeSeconds,
|
||||
});
|
||||
|
||||
// Now that we've updated the data/waveform, let's do a time check. We don't want to
|
||||
// go horribly over the limit. We also emit a warning state if needed.
|
||||
const secondsLeft = TARGET_MAX_LENGTH - ev.playbackTime;
|
||||
const secondsLeft = TARGET_MAX_LENGTH - timeSeconds;
|
||||
if (secondsLeft <= 0) {
|
||||
// noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping
|
||||
this.stop();
|
||||
@ -191,7 +227,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
}
|
||||
this.observable = new SimpleObservable<IRecordingUpdate>();
|
||||
await this.makeRecorder();
|
||||
this.recorderProcessor.addEventListener("audioprocess", this.processAudioUpdate);
|
||||
await this.recorder.start();
|
||||
this.recording = true;
|
||||
this.emit(RecordingState.Started);
|
||||
@ -205,6 +240,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
|
||||
// Disconnect the source early to start shutting down resources
|
||||
this.recorderSource.disconnect();
|
||||
this.recorderWorklet.disconnect();
|
||||
await this.recorder.stop();
|
||||
|
||||
// close the context after the recorder so the recorder doesn't try to
|
||||
@ -216,7 +252,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
|
||||
// Finally do our post-processing and clean up
|
||||
this.recording = false;
|
||||
this.recorderProcessor.removeEventListener("audioprocess", this.processAudioUpdate);
|
||||
await this.recorder.close();
|
||||
this.emit(RecordingState.Ended);
|
||||
|
||||
@ -240,7 +275,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
|
||||
|
||||
this.emit(RecordingState.Uploading);
|
||||
this.mxc = await this.client.uploadContent(new Blob([this.buffer], {
|
||||
type: "audio/ogg",
|
||||
type: this.contentType,
|
||||
}), {
|
||||
onlyContentUri: false, // to stop the warnings in the console
|
||||
}).then(r => r['content_uri']);
|
||||
|
37
src/voice/consts.ts
Normal file
37
src/voice/consts.ts
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
export const WORKLET_NAME = "mx-voice-worklet";
|
||||
|
||||
export enum PayloadEvent {
|
||||
Timekeep = "timekeep",
|
||||
AmplitudeMark = "amplitude_mark",
|
||||
}
|
||||
|
||||
export interface IPayload {
|
||||
ev: PayloadEvent;
|
||||
}
|
||||
|
||||
export interface ITimingPayload extends IPayload {
|
||||
ev: PayloadEvent.Timekeep;
|
||||
timeSeconds: number;
|
||||
}
|
||||
|
||||
export interface IAmplitudePayload extends IPayload {
|
||||
ev: PayloadEvent.AmplitudeMark;
|
||||
forSecond: number;
|
||||
amplitude: number;
|
||||
}
|
Loading…
Reference in New Issue
Block a user