diff --git a/src/@types/global.d.ts b/src/@types/global.d.ts index e8f2f1bc08..f04a2ff237 100644 --- a/src/@types/global.d.ts +++ b/src/@types/global.d.ts @@ -52,6 +52,9 @@ declare global { init: () => Promise; }; + // Needed for Safari, unknown to TypeScript + webkitAudioContext: typeof AudioContext; + mxContentMessages: ContentMessages; mxToastStore: ToastStore; mxDeviceListener: DeviceListener; diff --git a/src/voice/Playback.ts b/src/voice/Playback.ts index caa5241e1a..66667b9c0c 100644 --- a/src/voice/Playback.ts +++ b/src/voice/Playback.ts @@ -21,6 +21,7 @@ import {SimpleObservable} from "matrix-widget-api"; import {IDestroyable} from "../utils/IDestroyable"; import {PlaybackClock} from "./PlaybackClock"; import {clamp} from "../utils/numbers"; +import {createAudioContext, decodeOgg} from "./compat"; export enum PlaybackState { Decoding = "decoding", @@ -49,7 +50,7 @@ export class Playback extends EventEmitter implements IDestroyable { */ constructor(private buf: ArrayBuffer, seedWaveform = DEFAULT_WAVEFORM) { super(); - this.context = new AudioContext(); + this.context = createAudioContext(); this.resampledWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES); this.waveformObservable.update(this.resampledWaveform); this.clock = new PlaybackClock(this.context); @@ -91,7 +92,23 @@ export class Playback extends EventEmitter implements IDestroyable { } public async prepare() { - this.audioBuf = await this.context.decodeAudioData(this.buf); + // Safari compat: promise API not supported on this function + this.audioBuf = await new Promise((resolve, reject) => { + this.context.decodeAudioData(this.buf, b => resolve(b), async e => { + // This error handler is largely for Safari as well, which doesn't support Opus/Ogg + // very well. + console.error("Error decoding recording: ", e); + console.warn("Trying to re-encode to WAV instead..."); + + const wav = await decodeOgg(this.buf); + + // noinspection ES6MissingAwait - not needed when using callbacks + this.context.decodeAudioData(wav, b => resolve(b), e => { + console.error("Still failed to decode recording: ", e); + reject(e); + }); + }); + }); // Update the waveform to the real waveform once we have channel data to use. We don't // exactly trust the user-provided waveform to be accurate... diff --git a/src/voice/VoiceRecording.ts b/src/voice/VoiceRecording.ts index 402bd8beca..fde5779fa2 100644 --- a/src/voice/VoiceRecording.ts +++ b/src/voice/VoiceRecording.ts @@ -19,16 +19,17 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; import {MatrixClient} from "matrix-js-sdk/src/client"; import CallMediaHandler from "../CallMediaHandler"; import {SimpleObservable} from "matrix-widget-api"; -import {clamp} from "../utils/numbers"; +import {clamp, percentageOf, percentageWithin} from "../utils/numbers"; import EventEmitter from "events"; import {IDestroyable} from "../utils/IDestroyable"; import {Singleflight} from "../utils/Singleflight"; import {PayloadEvent, WORKLET_NAME} from "./consts"; import {UPDATE_EVENT} from "../stores/AsyncStore"; import {Playback} from "./Playback"; +import {createAudioContext} from "./compat"; const CHANNELS = 1; // stereo isn't important -const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. +export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. const TARGET_MAX_LENGTH = 120; // 2 minutes in seconds. Somewhat arbitrary, though longer == larger files. const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary. @@ -55,6 +56,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { private recorderStream: MediaStream; private recorderFFT: AnalyserNode; private recorderWorklet: AudioWorkletNode; + private recorderProcessor: ScriptProcessorNode; private buffer = new Uint8Array(0); // use this.audioBuffer to access private mxc: string; private recording = false; @@ -98,7 +100,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { deviceId: CallMediaHandler.getAudioInput(), }, }); - this.recorderContext = new AudioContext({ + this.recorderContext = createAudioContext({ // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing) }); this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); @@ -118,28 +120,38 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // noinspection ExceptionCaughtLocallyJS throw new Error("Unable to create recorder: no worklet script registered"); } - await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); - this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); - this.recorderSource.connect(this.recorderWorklet); - this.recorderWorklet.connect(this.recorderContext.destination); - // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. - this.recorderWorklet.port.onmessage = (ev) => { - switch (ev.data['ev']) { - case PayloadEvent.Timekeep: - this.processAudioUpdate(ev.data['timeSeconds']); - break; - case PayloadEvent.AmplitudeMark: - // Sanity check to make sure we're adding about one sample per second - if (ev.data['forSecond'] === this.amplitudes.length) { - this.amplitudes.push(ev.data['amplitude']); - } - break; - } - }; + if (this.recorderContext.audioWorklet) { + await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); + this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); + this.recorderSource.connect(this.recorderWorklet); + this.recorderWorklet.connect(this.recorderContext.destination); + + // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. + this.recorderWorklet.port.onmessage = (ev) => { + switch (ev.data['ev']) { + case PayloadEvent.Timekeep: + this.processAudioUpdate(ev.data['timeSeconds']); + break; + case PayloadEvent.AmplitudeMark: + // Sanity check to make sure we're adding about one sample per second + if (ev.data['forSecond'] === this.amplitudes.length) { + this.amplitudes.push(ev.data['amplitude']); + } + break; + } + }; + } else { + // Safari fallback: use a processor node instead, buffered to 1024 bytes of data + // like the worklet is. + this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); + this.recorderSource.connect(this.recorderProcessor); + this.recorderProcessor.connect(this.recorderContext.destination); + this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess); + } this.recorder = new Recorder({ encoderPath, // magic from webpack @@ -209,6 +221,13 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { return this.mxc; } + private onAudioProcess = (ev: AudioProcessingEvent) => { + this.processAudioUpdate(ev.playbackTime); + + // We skip the functionality of the worklet regarding waveform calculations: we + // should get that information pretty quick during the playback info. + }; + private processAudioUpdate = (timeSeconds: number) => { if (!this.recording) return; @@ -216,7 +235,16 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // size. The time domain is also known as the audio waveform. We're ignoring the // output of the FFT here (frequency data) because we're not interested in it. const data = new Float32Array(this.recorderFFT.fftSize); - this.recorderFFT.getFloatTimeDomainData(data); + if (!this.recorderFFT.getFloatTimeDomainData) { + // Safari compat + const data2 = new Uint8Array(this.recorderFFT.fftSize); + this.recorderFFT.getByteTimeDomainData(data2); + for (let i = 0; i < data2.length; i++) { + data[i] = percentageWithin(percentageOf(data2[i], 0, 256), -1, 1); + } + } else { + this.recorderFFT.getFloatTimeDomainData(data); + } // We can't just `Array.from()` the array because we're dealing with 32bit floats // and the built-in function won't consider that when converting between numbers. @@ -287,7 +315,11 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // Disconnect the source early to start shutting down resources await this.recorder.stop(); // stop first to flush the last frame this.recorderSource.disconnect(); - this.recorderWorklet.disconnect(); + if (this.recorderWorklet) this.recorderWorklet.disconnect(); + if (this.recorderProcessor) { + this.recorderProcessor.disconnect(); + this.recorderProcessor.removeEventListener("audioprocess", this.onAudioProcess); + } // close the context after the recorder so the recorder doesn't try to // connect anything to the context (this would generate a warning) diff --git a/src/voice/compat.ts b/src/voice/compat.ts new file mode 100644 index 0000000000..d91af31b6c --- /dev/null +++ b/src/voice/compat.ts @@ -0,0 +1,78 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import wavEncoderPath from 'opus-recorder/dist/waveWorker.min.js'; +import decoderPath from 'opus-recorder/dist/decoderWorker.min.js'; +import {SAMPLE_RATE} from "./VoiceRecording"; +// @ts-ignore - we know that this is not a module. We're looking for a path. +import decoderWasmPath from 'opus-recorder/dist/decoderWorker.min.wasm'; + +export function createAudioContext(opts?: AudioContextOptions): AudioContext { + if (window.AudioContext) { + return new AudioContext(opts); + } else if (window.webkitAudioContext) { + return new window.webkitAudioContext(opts); + } else { + throw new Error("Unsupported browser"); + } +} + +export function decodeOgg(audioBuffer: ArrayBuffer): Promise { + // Condensed version of decoder example, using a promise: + // https://github.com/chris-rudmin/opus-recorder/blob/master/example/decoder.html + return new Promise((resolve) => { // no reject because the workers don't seem to have a fail path + console.log("Decoder WASM path: " + decoderWasmPath); // so we use the variable (avoid tree shake) + const typedArray = new Uint8Array(audioBuffer); + const decoderWorker = new Worker(decoderPath); + const wavWorker = new Worker(wavEncoderPath); + + decoderWorker.postMessage({ + command: 'init', + decoderSampleRate: SAMPLE_RATE, + outputBufferSampleRate: SAMPLE_RATE, + }); + + wavWorker.postMessage({ + command: 'init', + wavBitDepth: 24, // standard for 48khz (SAMPLE_RATE) + wavSampleRate: SAMPLE_RATE, + }); + + decoderWorker.onmessage = (ev) => { + if (ev.data === null) { // null == done + wavWorker.postMessage({command: 'done'}); + return; + } + + wavWorker.postMessage({ + command: 'encode', + buffers: ev.data, + }, ev.data.map(b => b.buffer)); + }; + + wavWorker.onmessage = (ev) => { + if (ev.data.message === 'page') { + // The encoding comes through as a single page + resolve(new Blob([ev.data.page], {type: "audio/wav"}).arrayBuffer()); + } + }; + + decoderWorker.postMessage({ + command: 'decode', + pages: typedArray, + }, [typedArray.buffer]); + }); +}