fix(connection-status): packet loss causes false positive critical alerts (#21049)

In 3.0, the packet loss metric used to trigger connection status alerts was
changed to the one generated by the `startMonitoringNetwork` method used by the
connection status modal. Since packet loss thresholds were not adjusted (0.5,
0.1, 0.2), a single lost packet causes the status alert to be permanently
stuck on "critical". This is explained by how different those metrics
are:
  - **Before (2.7):** A 5-probe wide calculation of inbound packet loss
  fraction based on `packetsLost` and `packetsReceived` metrics.
  - **Now (3.0):** An absolute counter of inbound lost packets.

This commit restores the previous packet loss metric used to trigger
connection status alerts, reverting to the original collection method via
`/utils/stats.js`. This resolves the issue, but further work is needed in
subsequent PRs:
  - Unify the collection done in `/utils/stats.js` with the
  `startMonitoringNetwork` method.
  - Incorporate the remote-inbound `fractionsLost` metric to account for packet
  loss on both legs of the network (in/out).
  - Update the packet loss metric displayed in the connection status modal to
 show a more meaningful value (e.g., packet loss percentage over a specific
  probe interval). An absolute counter of lost packets isn't useful for end
  users.
  - Update the alert log to use the fraction or percentage above
This commit is contained in:
Paulo Lanzarin 2024-08-28 17:55:57 -03:00 committed by GitHub
parent dfa0e59e5e
commit dca9b87190
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 9 deletions

View File

@ -1,7 +1,11 @@
import { useEffect, useRef } from 'react';
import { useMutation } from '@apollo/client';
import { UPDATE_CONNECTION_ALIVE_AT } from './mutations';
import { getStatus, startMonitoringNetwork } from '/imports/ui/components/connection-status/service';
import {
getStatus,
handleAudioStatsEvent,
startMonitoringNetwork,
} from '/imports/ui/components/connection-status/service';
import connectionStatus from '../../core/graphql/singletons/connectionStatus';
import getBaseUrl from '/imports/ui/core/utils/getBaseUrl';
@ -81,10 +85,13 @@ const ConnectionStatus = () => {
const STATS_ENABLED = window.meetingClientSettings.public.stats.enabled;
if (STATS_ENABLED) {
window.addEventListener('audiostats', handleAudioStatsEvent);
startMonitoringNetwork();
}
return () => {
window.removeEventListener('audiostats', handleAudioStatsEvent);
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
}

View File

@ -56,14 +56,25 @@ export const getStats = () => {
return STATS.level[lastLevel()];
};
export const startStatsTimeout = () => {
const STATS = window.meetingClientSettings.public.stats;
export const handleAudioStatsEvent = (event) => {
const { detail } = event;
if (statsTimeout !== null) clearTimeout(statsTimeout);
if (detail) {
const { loss } = detail;
statsTimeout = setTimeout(() => {
// setStats(-1, 'recovery', {});
}, STATS.timeout);
// The stat provided by this event is the *INBOUND* packet loss fraction
// calculated manually by using the packetsLost and packetsReceived metrics.
// It uses a 5 probe wide window - so roughly a 10 seconds period with a 2
// seconds interval between captures.
//
// This metric is DIFFERENT from the one used in the connection status modal
// (see the network data object in this file). The network data one is an
// absolute counter of INBOUND packets lost - and it should be used to determine
// alert triggers
connectionStatus.setPacketLossStatus(
getStatus(window.meetingClientSettings.public.stats.loss, loss),
);
}
};
export const sortLevel = (a, b) => {
@ -429,8 +440,6 @@ export async function startMonitoringNetwork() {
previousData = data;
connectionStatus.setNetworkData(networkData);
connectionStatus
.setPacketLossStatus(getStatus(window.meetingClientSettings.public.stats.loss, packetsLost));
}, NETWORK_MONITORING_INTERVAL_MS);
}