feat: add experimental support for ICE restart

We currently use full renegotiation for audio, video, and screen sharing
reconnections, which involves re-creating transports and signaling channels
from scratch. While effective in some scenarios, this approach is slow and,
especially with outbound cameras and screen sharing, prone to failures.

To counter that, WebRTC provides a mechanism to restart ICE without needing
to re-create the peer connection. This allows us to avoid full renegotiation
and bypass some server-side signaling limitations. Implementing ICE restart
should make outbound camera/screen sharing reconnections more reliable and
faster.

This commit implements the ICE restart procedure for all WebRTC components,
based on bbb-webrtc-sfu >= v2.15.0-beta.0, which added support for ICE restart
requests. This feature is off by default. To enable it, adjust the following
flags:

- `/etc/bigbluebutton/bbb-webrtc-sfu/production.yml`: `allowIceRestart: true`
- `/etc/bigbluebutton/bbb-html5.yml`: `public.kurento.restartIce`
  * Refer to the inline documentation; this can be enabled on the client side
    per media type.
  * Note: The default max retries for audio is lower than for cameras/screen
    sharing (1 vs 3). This is because the full renegotiation process for audio
    is more reliable, so ICE restart is attempted first, followed by full
    renegotiation if necessary. This approach is less suitable for cameras/
    screen sharing, where longer retry periods for ICE restart make sense
    since full renegotation there is... iffy.
This commit is contained in:
prlanzarin 2024-08-22 16:35:08 -03:00
parent d47c19c4a7
commit d2dde8a9b1
8 changed files with 267 additions and 11 deletions

View File

@ -32,6 +32,10 @@ const TRANSPARENT_LISTEN_ONLY = MEDIA.transparentListenOnly;
const MEDIA_TAG = MEDIA.mediaTag.replace(/#/g, '');
const CONNECTION_TIMEOUT_MS = MEDIA.listenOnlyCallTimeout || 15000;
const { audio: NETWORK_PRIORITY } = MEDIA.networkPriorities || {};
const {
enabled: RESTART_ICE = false,
retries: RESTART_ICE_RETRIES = 1,
} = Meteor.settings.public.kurento?.restartIce?.audio || {};
const SENDRECV_ROLE = 'sendrecv';
const RECV_ROLE = 'recv';
const BRIDGE_NAME = 'fullaudio';
@ -346,6 +350,8 @@ export default class SFUAudioBridge extends BaseAudioBridge {
mediaStreamFactory: this.mediaStreamFactory,
gatheringTimeout: GATHERING_TIMEOUT,
transparentListenOnly: isTransparentListenOnlyEnabled(),
restartIce: RESTART_ICE,
restartIceMaxRetries: RESTART_ICE_RETRIES,
};
this.broker = new AudioBroker(

View File

@ -15,6 +15,10 @@ const SIGNAL_CANDIDATES = Meteor.settings.public.kurento.signalCandidates;
const TRACE_LOGS = Meteor.settings.public.kurento.traceLogs;
const { screenshare: NETWORK_PRIORITY } = Meteor.settings.public.media.networkPriorities || {};
const GATHERING_TIMEOUT = Meteor.settings.public.kurento.gatheringTimeout;
const {
enabled: RESTART_ICE = false,
retries: RESTART_ICE_RETRIES = 3,
} = Meteor.settings.public.kurento?.restartIce?.screenshare || {};
const BRIDGE_NAME = 'kurento'
const SCREENSHARE_VIDEO_TAG = 'screenshareVideo';
@ -269,6 +273,8 @@ export default class KurentoScreenshareBridge {
forceRelay: shouldForceRelay(),
traceLogs: TRACE_LOGS,
gatheringTimeout: GATHERING_TIMEOUT,
restartIce: RESTART_ICE,
restartIceMaxRetries: RESTART_ICE_RETRIES,
};
this.broker = new ScreenshareBroker(
@ -341,6 +347,7 @@ export default class KurentoScreenshareBridge {
traceLogs: TRACE_LOGS,
networkPriority: NETWORK_PRIORITY,
gatheringTimeout: GATHERING_TIMEOUT,
restartIce: RESTART_ICE,
};
this.broker = new ScreenshareBroker(

View File

@ -48,6 +48,10 @@ const {
const SIGNAL_CANDIDATES = Meteor.settings.public.kurento.signalCandidates;
const TRACE_LOGS = Meteor.settings.public.kurento.traceLogs;
const GATHERING_TIMEOUT = Meteor.settings.public.kurento.gatheringTimeout;
const {
enabled: RESTART_ICE = false,
retries: RESTART_ICE_RETRIES = 3,
} = Meteor.settings.public.kurento?.restartIce?.video || {};
const intlClientErrors = defineMessages({
permissionError: {
@ -324,6 +328,10 @@ class VideoProvider extends Component {
this.handleIceCandidate(parsedMessage);
break;
case 'restartIceResponse':
this.handleRestartIceResponse(parsedMessage);
break;
case 'pong':
break;
@ -500,6 +508,36 @@ class VideoProvider extends Component {
this.sendMessage(message);
}
requestRestartIce(peer, stream) {
if (peer == null) {
throw new Error('No peer to restart ICE');
}
if (peer.vpRestartIceRetries >= RESTART_ICE_RETRIES) {
throw new Error('Max ICE restart retries reached');
}
const role = VideoService.getRole(peer.isPublisher);
const message = {
id: 'restartIce',
type: 'video',
cameraId: stream,
role,
};
// eslint-disable-next-line no-param-reassign
peer.vpRestartIceRetries += 1;
logger.warn({
logCode: 'video_provider_restart_ice',
extraInfo: {
cameraId: stream,
role,
restartIceRetries: peer.vpRestartIceRetries,
},
}, `Requesting ICE restart (${peer.vpRestartIceRetries}/${RESTART_ICE_RETRIES})`);
this.sendMessage(message);
}
startResponse(message) {
const { cameraId: stream, role } = message;
const peer = this.webRtcPeers[stream];
@ -565,6 +603,36 @@ class VideoProvider extends Component {
}
}
handleRestartIceResponse(message) {
const { cameraId: stream, sdp } = message;
const peer = this.webRtcPeers[stream];
if (peer) {
peer?.restartIce(sdp, peer?.isPublisher)
.catch((error) => {
const { peerConnection } = peer;
if (peerConnection) peerConnection.onconnectionstatechange = null;
logger.error({
logCode: 'video_provider_restart_ice_error',
extraInfo: {
errorMessage: error?.message,
errorCode: error?.code,
errorName: error?.name,
cameraId: stream,
role: VideoService.getRole(peer?.isPublisher),
},
}, `ICE restart failed for camera ${stream}`);
this._onWebRTCError(
new Error('iceConnectionStateError'),
stream,
VideoService.isLocalStream(stream),
);
});
}
}
clearRestartTimers(stream) {
if (this.restartTimeout[stream]) {
clearTimeout(this.restartTimeout[stream]);
@ -667,6 +735,7 @@ class VideoProvider extends Component {
peer.isPublisher = true;
peer.originalProfileId = profileId;
peer.currentProfileId = profileId;
peer.vpRestartIceRetries = 0;
peer.start();
peer.generateOffer().then((offer) => {
// Store the media stream if necessary. The scenario here is one where
@ -978,16 +1047,13 @@ class VideoProvider extends Component {
}
_handleIceConnectionStateChange(stream, isLocal) {
const { intl } = this.props;
const peer = this.webRtcPeers[stream];
const role = VideoService.getRole(isLocal);
if (peer && peer.peerConnection) {
const pc = peer.peerConnection;
const connectionState = pc.connectionState;
notifyStreamStateChange(stream, connectionState);
if (connectionState === 'failed' || connectionState === 'closed') {
const { connectionState } = pc;
const handleFatalFailure = () => {
const error = new Error('iceConnectionStateError');
// prevent the same error from being detected multiple times
pc.onconnectionstatechange = null;
@ -1002,6 +1068,45 @@ class VideoProvider extends Component {
}, `Camera ICE connection state changed: ${connectionState}. Role: ${role}.`);
this._onWebRTCError(error, stream, isLocal);
};
notifyStreamStateChange(stream, connectionState);
switch (connectionState) {
case 'closed':
handleFatalFailure();
break;
case 'failed':
if (!RESTART_ICE) {
handleFatalFailure();
} else {
try {
this.requestRestartIce(peer, stream);
} catch (error) {
handleFatalFailure();
}
}
break;
case 'connected':
if (peer && peer?.vpRestartIceRetries > 0) {
logger.info({
logCode: 'video_provider_ice_restarted',
extraInfo: {
cameraId: stream,
role: VideoService.getRole(peer?.isPublisher),
restartIceRetries: peer?.vpRestartIceRetries,
},
}, 'ICE restart successful');
peer.vpRestartIceRetries = 0;
}
break;
default:
break;
}
} else {
logger.error({

View File

@ -151,6 +151,9 @@ class AudioBroker extends BaseBroker {
this.onstart(parsedMessage.success);
this.started = true;
break;
case 'restartIceResponse':
this.handleRestartIceResponse(parsedMessage);
break;
case 'webRTCAudioError':
case 'error':
this.handleSFUError(parsedMessage);

View File

@ -41,6 +41,8 @@ class ScreenshareBroker extends BaseBroker {
// traceLogs
// networkPriority
// gatheringTimeout
// restartIce
// restartIceMaxRetries
Object.assign(this, options);
}
@ -97,6 +99,9 @@ class ScreenshareBroker extends BaseBroker {
case 'iceCandidate':
this.handleIceCandidate(parsedMessage.candidate);
break;
case 'restartIceResponse':
this.handleRestartIceResponse(parsedMessage);
break;
case 'error':
this.handleSFUError(parsedMessage);
break;

View File

@ -6,6 +6,7 @@ const WS_HEARTBEAT_OPTS = {
interval: 15000,
delay: 3000,
};
const ICE_RESTART = 'restartIce';
class BaseBroker {
static assembleError(code, reason) {
@ -29,6 +30,9 @@ class BaseBroker {
this.signallingTransportOpen = false;
this.logCodePrefix = `${this.sfuComponent}_broker`;
this.peerConfiguration = {};
this.restartIce = false;
this.restartIceMaxRetries = 3;
this._restartIceRetries = 0;
this.onbeforeunload = this.onbeforeunload.bind(this);
this._onWSError = this._onWSError.bind(this);
@ -277,18 +281,47 @@ class BaseBroker {
handleConnectionStateChange (eventIdentifier) {
if (this.webRtcPeer) {
const { peerConnection } = this.webRtcPeer;
const connectionState = peerConnection.connectionState;
if (eventIdentifier) {
notifyStreamStateChange(eventIdentifier, connectionState);
}
if (connectionState === 'failed' || connectionState === 'closed') {
const { connectionState } = peerConnection;
const handleFatalFailure = () => {
if (this.webRtcPeer?.peerConnection) {
this.webRtcPeer.peerConnection.onconnectionstatechange = null;
}
// 1307: "ICE_STATE_FAILED",
const error = BaseBroker.assembleError(1307);
this.onerror(error);
};
if (eventIdentifier) notifyStreamStateChange(eventIdentifier, connectionState);
switch (connectionState) {
case 'closed':
handleFatalFailure();
break;
case 'failed':
if (!this.restartIce) {
handleFatalFailure();
} else {
try {
this.requestRestartIce();
} catch (error) {
handleFatalFailure();
}
}
break;
case 'connected':
if (this._restartIceRetries > 0) {
this._restartIceRetries = 0;
logger.info({
logCode: `${this.logCodePrefix}_ice_restarted`,
extraInfo: { sfuComponent: this.sfuComponent },
}, 'ICE restart successful');
}
break;
default:
break;
}
}
}
@ -333,6 +366,52 @@ class BaseBroker {
}
}
// Sends a message to the SFU to restart ICE
requestRestartIce() {
if (this._restartIceRetries >= this.restartIceMaxRetries) {
throw new Error('Max ICE restart retries reached');
}
const message = {
id: ICE_RESTART,
type: this.sfuComponent,
role: this.role,
};
this._restartIceRetries += 1;
logger.warn({
logCode: `${this.logCodePrefix}_restart_ice`,
extraInfo: {
sfuComponent: this.sfuComponent,
retries: this._restartIceRetries,
},
}, `Requesting ICE restart (${this._restartIceRetries}/${this.restartIceMaxRetries})`);
this.sendMessage(message);
}
handleRestartIceResponse({ sdp }) {
if (this.webRtcPeer) {
this.webRtcPeer.restartIce(sdp, this.offering).catch((error) => {
logger.error({
logCode: `${this.logCodePrefix}_restart_ice_error`,
extraInfo: {
errorMessage: error?.message,
errorCode: error?.code,
errorName: error?.name,
sfuComponent: this.sfuComponent,
},
}, 'ICE restart failed');
if (this.webRtcPeer?.peerConnection) {
this.webRtcPeer.peerConnection.onconnectionstatechange = null;
}
// 1307: "ICE_STATE_FAILED",
this.onerror(BaseBroker.assembleError(1307));
});
}
}
disposePeer () {
if (this.webRtcPeer) {
this.webRtcPeer.dispose();

View File

@ -440,6 +440,42 @@ export default class WebRtcPeer extends EventEmitter2 {
});
}
restartIce(remoteSdp, initiator) {
if (this.isPeerConnectionClosed()) {
this.logger.error('BBB::WebRtcPeer::restartIce - peer connection closed');
throw new Error('Peer connection is closed');
}
const sdp = new RTCSessionDescription({
type: initiator ? 'offer' : 'answer',
sdp: remoteSdp,
});
this.logger.debug('BBB::WebRtcPeer::restartIce - setting remote description', sdp);
// If this peer was the original initiator, process remote first
if (initiator) {
return this.peerConnection.setRemoteDescription(sdp)
.then(() => this.peerConnection.createAnswer())
.then((answer) => this.peerConnection.setLocalDescription(answer))
.then(() => {
const localDescription = this.getLocalSessionDescriptor();
this.logger.debug('BBB::WebRtcPeer::restartIce - local description set', localDescription.sdp);
return localDescription.sdp;
});
}
// not the initiator - need to create offer first
return this.peerConnection.createOffer({ iceRestart: true })
.then((newOffer) => this.peerConnection.setLocalDescription(newOffer))
.then(() => {
const localDescription = this.getLocalSessionDescriptor();
this.logger.debug('BBB::WebRtcPeer::restartIce - local description set', localDescription.sdp);
return localDescription.sdp;
})
.then(() => this.peerConnection.setRemoteDescription(sdp));
}
dispose() {
this.logger.debug('BBB::WebRtcPeer::dispose');

View File

@ -319,6 +319,21 @@ public:
# Controls whether ICE candidates should be signaled to bbb-webrtc-sfu.
# Enable this if you want to use Kurento as the media server.
signalCandidates: false
# restartIce: controls whether ICE restarts should be signaled to bbb-webrtc-sfu
# whenever peers of the selected type (audio, video, screenshare) transition
# to failure states. Disabled by default (experimental).
# restartIce.<mediaType>.retries: number of ICE restart retries before giving up
# (i.e.: throwing an error). Default is 1 for audio, 3 for video and screenshare.
restartIce:
audio:
enabled: false
retries: 1
video:
enabled: false
retries: 3
screenshare:
enabled: false
retries: 3
# traceLogs: <Boolean> - enable trace logs in SFU peers
traceLogs: false
cameraTimeouts: