Merge pull request #3253 from kepstin/recording-captions

Recording captions
This commit is contained in:
Richard Alam 2016-08-02 17:32:12 -04:00 committed by GitHub
commit 9752a0651f
6 changed files with 462 additions and 133 deletions

View File

@ -125,7 +125,7 @@ module BigBlueButton
def self.exec_ret(*command)
BigBlueButton.logger.info "Executing: #{command.join(' ')}"
IO.popen([*command, :err => [:child, :out]]) do |io|
io.lines.each do |line|
io.each_line do |line|
BigBlueButton.logger.info line.chomp
end
end
@ -139,7 +139,7 @@ module BigBlueButton
IO.pipe do |r, w|
pid = spawn(*command, :out => outio, :err => w)
w.close
r.lines.each do |line|
r.each_line do |line|
BigBlueButton.logger.info line.chomp
end
Process.waitpid(pid)

View File

@ -0,0 +1,399 @@
#!/usr/bin/env python3
# This file is part of BigBlueButton.
#
# BigBlueButton is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BigBlueButton is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with BigBlueButton. If not, see <http://www.gnu.org/licenses/>.
# To install dependencies on Ubuntu:
# apt-get install python3 python3-lxml python3-pyicu
from lxml import etree
from collections import deque
from fractions import Fraction
import io
from icu import Locale, BreakIterator
import unicodedata
import html
import logging
import json
import sys
import os
import argparse
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def webvtt_timestamp(ms):
frac_s = int(ms % 1000)
s = int(ms / 1000 % 60)
m = int(ms / 1000 / 60 % 60)
h = int(ms / 1000 / 60 / 60)
return '{:02}:{:02}:{:02}.{:03}'.format(h, m, s, frac_s)
class CaptionLine:
def __init__(self):
self.text = ""
self.start_time = 0
self.end_time = 0
class Caption:
def __init__(self, locale):
self.locale = locale
self.text = list()
self.timestamps = list()
self._del_timestamps = list()
def apply_edit(self, i, j, timestamp, text):
del_timestamp = None
if j > i:
if self._del_timestamps[i] is not None:
del_timestamp = self._del_timestamps[i]
else:
del_timestamp = self.timestamps[i]
self._del_timestamps[i] = del_timestamp
logger.debug("Removing text %s at %d:%d, del_ts: %d",
repr(''.join(self.text[i:j])), i, j, del_timestamp)
if len(text) > 0:
logger.debug("Inserting text %s at %d:%d, ts: %d",
repr(''.join(text)), i, j, timestamp)
if i < len(self.timestamps) and timestamp > self.timestamps[i]:
timestamp = self._del_timestamps[i]
if timestamp is None and i > 0:
timestamp = self.timestamps[i-1]
logger.debug("Out of order timestamps, using ts: %d", timestamp)
self._del_timestamps[i:j] = [del_timestamp] * len(text)
if (i < len(self._del_timestamps)):
self._del_timestamps[i] = del_timestamp
self.text[i:j] = text
self.timestamps[i:j] = [timestamp] * len(text)
def apply_record_events(self, events):
record = False
ts_offset = 0
stop_ts = 0
start_ts = None
stop_pos = 0
start_pos = None
for event in events:
if event['name'] == 'record_status':
status = event['status']
timestamp = event['timestamp']
if status and not record:
record = True
start_ts = timestamp
logger.debug("Recording started at ts: %d", start_ts)
# Find the position of the first character after recording
# started
start_pos = stop_pos
while start_pos < len(self.timestamps) and \
self.timestamps[start_pos] < start_ts:
start_pos += 1
logger.debug("Replacing characters %d:%d",
stop_pos, start_pos)
self.text[stop_pos:start_pos] = ["\n"]
self.timestamps[stop_pos:start_pos] = [stop_ts - ts_offset]
start_pos = stop_pos + 1
ts_offset += start_ts - stop_ts
logger.debug("Timestamp offset now %d", ts_offset)
stop_ts = None
stop_pos = None
if not status and record:
record = False
stop_ts = timestamp
logger.debug("Recording stopped at ts: %d", stop_ts)
# Find the position of the first character after recording
# stopped, and apply ts offsets
stop_pos = start_pos
while stop_pos < len(self.timestamps) and \
self.timestamps[stop_pos] < stop_ts:
self.timestamps[stop_pos] -= ts_offset
stop_pos += 1
if record:
logger.debug("No recording stop, applying final ts offsets")
while start_pos < len(self.timestamps):
self.timestamps[start_pos] -= ts_offset
start_pos += 1
@classmethod
def from_events(cls, events, apply_record_events=True):
captions = {}
# Apply all of the caption events to generate the full text
# with per-character timestamps
for event in events:
if event['name'] == 'edit_caption_history':
locale = event['locale']
i = event['start_index']
j = event['end_index']
timestamp = event['timestamp']
text = event['text']
caption = captions.get(locale)
if caption is None:
logger.info("Started caption stream for locale '%s'",
locale)
captions[locale] = caption = cls(locale)
caption.apply_edit(i, j, timestamp, text)
if apply_record_events:
for locale, caption in captions.items():
logger.info("Applying recording events to locale '%s'", locale)
caption.apply_record_events(events)
logger.info("Generated %d caption stream(s)", len(captions))
return captions
def split_lines(self, max_length=32):
lines = list()
str_text = "".join(self.text)
locale = Locale(self.locale)
logger.debug("Using locale %s for word-wrapping",
locale.getDisplayName(locale))
break_iter = BreakIterator.createLineInstance(locale)
break_iter.setText(str_text)
line = CaptionLine()
line_start = 0
prev_break = 0
next_break = break_iter.following(prev_break)
# Super simple "greedy" line break algorithm.
while prev_break < len(self.text):
status = break_iter.getRuleStatus()
line_end = next_break
while line_end > line_start and ( \
self.text[line_end-1].isspace() or \
unicodedata.category(self.text[line_end-1]) in ['Cc', 'Mn']
):
line_end -= 1
do_break = False
text_section = unicodedata.normalize(
'NFC', "".join(self.text[line_start:line_end]))
timestamps_section = self.timestamps[line_start:next_break]
start_time = min(timestamps_section)
end_time = max(timestamps_section)
if len(text_section) > max_length:
if prev_break == line_start:
# Over-long string. Just chop it into bits
line_end = next_break = prev_break + max_length
else:
next_break = prev_break
do_break = True
else:
# Status [100,200) indicates a required (hard) line break
if next_break >= len(self.text) or \
(status >= 100 and status < 200):
line.text = text_section
line.start_time = start_time
line.end_time = end_time
do_break = True
if do_break:
logger.debug("text section %d -> %d (%d): %s",
line.start_time, line.end_time,
len(line.text), repr(line.text))
lines.append(line)
line = CaptionLine()
line_start = next_break
else:
line.text = text_section
line.start_time = start_time
line.end_time = end_time
prev_break = next_break
next_break = break_iter.following(prev_break)
return lines
def write_webvtt(self, f):
# Write magic
f.write("WEBVTT\n\n".encode('utf-8'))
lines = self.split_lines()
for i, line in enumerate(lines):
# Don't generate a cue for empty lines
if len(line.text) == 0:
continue
start_time = line.start_time
end_time = line.end_time
if i + 1 < len(lines):
next_start_time = lines[i + 1].start_time
# If the next line is close after the current line, make the
# timestamps continuous so the subtitle doesn't "blink"
if next_start_time - end_time < 1000:
end_time = next_start_time
# Apply some duration cleanup heuristics to give some reasonable
# line durations
duration = end_time - start_time
# Make lines go away if they've been showing for >16 seconds
if duration > 16000:
duration = 16000
# A minimum per-character time for display (up to 3.2s for 32char)
if duration < 100 * len(line.text):
duration = 100 * len(line.text)
# Never show a caption (even a short one) for less than 1s
if duration < 1000:
duration = 1000
end_time = start_time + duration
f.write("{} --> {}\n".format(
webvtt_timestamp(start_time),
webvtt_timestamp(end_time)
).encode('utf-8'))
f.write(html.escape(line.text, quote=False).encode('utf-8'))
f.write("\n\n".encode('utf-8'))
def caption_desc(self):
locale = Locale(self.locale)
return {
"locale": self.locale,
"localeName": locale.getDisplayName(locale)
}
def parse_record_status(event, element):
userId = element.find('userId')
status = element.find('status')
event['name'] = 'record_status'
event['user_id'] = userId.text
event['status'] = (status.text == 'true')
def parse_caption_edit(event, element):
locale = element.find('locale')
text = element.find('text')
startIndex = element.find('startIndex')
endIndex = element.find('endIndex')
localeCode = element.find('localeCode')
event['name'] = 'edit_caption_history'
event['locale_name'] = locale.text
if localeCode is not None:
event['locale'] = localeCode.text
else:
# Fallback for missing 'localeCode'
event['locale'] = "en"
if text.text is None:
event['text'] = list()
else:
event['text'] = list(text.text)
event['start_index'] = int(startIndex.text)
event['end_index'] = int(endIndex.text)
def parse_events(directory="."):
start_time = None
have_record_events = False
events = deque()
with open("{}/events.xml".format(directory), "rb") as f:
for _, element in etree.iterparse(f, tag="event"):
try:
event = {}
# Convert timestamps to be in seconds from recording start
timestamp = int(element.attrib['timestamp'])
if not start_time:
start_time = timestamp
timestamp = timestamp - start_time
# Only need events from these modules
if not element.attrib['module'] in ['CAPTION','PARTICIPANT']:
continue
event['name'] = name = element.attrib['eventname']
event['timestamp'] = timestamp
if name == 'RecordStatusEvent':
parse_record_status(event, element)
have_record_events = True
elif name == 'EditCaptionHistoryEvent':
parse_caption_edit(event, element)
else:
logger.debug("Unhandled event: %s", name)
continue
events.append(event)
finally:
element.clear()
if not have_record_events:
# Add a fake record start event to the events list
event = {
'name': 'record_status',
'user_id': None,
'timestamp': 0,
'status': True
}
events.appendleft(event)
return events
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate WebVTT files from BigBlueButton captions",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-i", "--input", metavar="PATH",
help="input directory with events.xml file",
default=os.curdir)
parser.add_argument("-o", "--output", metavar="PATH",
help="output directory",
default=os.curdir)
args = parser.parse_args()
rawdir = args.input
outputdir = args.output
logger.info("Reading recording events file")
events = parse_events(rawdir)
logger.info("Generating caption data from recording events")
captions = Caption.from_events(events)
for locale, caption in captions.items():
filename = os.path.join(outputdir, "caption_{}.vtt".format(locale))
logger.info("Writing captions for locale %s to %s", locale, filename)
with open(filename, "wb") as f:
caption.write_webvtt(f)
filename = os.path.join(outputdir, "captions.json")
logger.info("Writing captions index file to %s", filename)
caption_descs = [ caption.caption_desc() for caption in captions.values() ]
with open(filename, "w") as f:
json.dump(caption_descs, f)

View File

@ -271,11 +271,6 @@
acorn.$seek.slider('value', currenttime);
}
}
// If captions are active, update them
if(captionsActive) {
updateCaption();
}
};
/*
@ -692,33 +687,14 @@
var captionRadioName = 'acornCaptions' + uniqueID();
var captionOff = function() {
captions = '';
acorn.$caption.hide();
activeCaptions = false;
acorn.$transcriptBtn.removeClass(transcriptBtnActiveClass).hide();
acorn.$transcript.hide();
for (var i = 0; i < acorn.$track.length; i++) {
var track = acorn.$track[i];
track.track.mode = "disabled";
}
acorn.$captionBtn.removeClass(captionBtnActiveClass);
};
/*
* Update caption based on "currentTime"
* Borrowed and adapted from Bruce Lawson's Accessible HTML5 Video with JavaScripted captions
* http://dev.opera.com/articles/view/accessible-html5-video-with-javascripted-captions/
*/
var updateCaption = function() {
var now = acorn.$self[0].currentTime; // how soon is now?
var text = "";
for (var i = 0; i < captions.length; i++) {
if (now >= captions[i].start && now <= captions[i].end) {
text = captions[i].content; // yes? then load it into a variable called text
break;
}
}
acorn.$caption.html(text); // and put contents of text into caption div
};
/*
* Initialize the Caption Selector
* Used when multiple <track>s are present
@ -792,54 +768,33 @@
* Takes the url as a parameter
*/
var loadCaption = function(url) {
// add a loading class to the Caption Button when starting to load the caption
acorn.$captionBtn.addClass(captionBtnLoadingClass);
// make an AJAX request to load the file
$.ajax({
url: url,
success: function(data) {
/*
* On success use a SRT parser on the loaded data
* Using JavaScript SRT parser by Silvia Pfeiffer <silvia@siliva-pfeiffer.de>
* parseSrt included at the end of this file
*/
captions = parseSrt(data);
// Iterate through the available captions, and disable all but the selected one
for (var i = 0; i < acorn.$track.length; i++) {
var track = acorn.$track[i];
if (track.getAttribute('src') == url) {
track.track.mode = "showing";
// TODO transcript markup?
// show the Transcript Button
acorn.$transcriptBtn.show();
//acorn.$transcriptBtn.show();
/*
* Generate the markup for the transcript
* Markup based on Bruce Lawson's Accessible HTML5 Video with JavaScripted captions
* http://dev.opera.com/articles/view/accessible-html5-video-with-javascripted-captions/
*/
var transcriptText = '';
$(captions).each(function() {
transcriptText += '<span data-begin="' + parseInt(this.start, 10) + '" data-end=' + parseInt(this.end, 10) + '>' + this.content.replace("'","") + '</span>';
});
//var transcriptText = '';
//$(captions).each(function() {
// transcriptText += '<span data-begin="' + parseInt(this.start, 10) + '" data-end=' + parseInt(this.end, 10) + '>' + this.content.replace("'","") + '</span>';
//});
// append the generated markup
acorn.$transcript.html(transcriptText);
// show caption
acorn.$caption.show();
//acorn.$transcript.html(transcriptText);
} else {
track.track.mode = "disabled";
}
}
captionsActive = true;
// in case the media is paused and timeUpdate is not triggered, trigger it
if(acorn.$self.prop('paused')) {
updateCaption();
}
acorn.$captionBtn.addClass(captionBtnActiveClass).removeClass(captionBtnLoadingClass);
},
error: function() {
// if an error occurs while loading the caption, turn captions off
captionOff();
// if a console is available, log error
if(console) {
console.log('Error loading captions');
}
}
});
acorn.$captionBtn.addClass(captionBtnActiveClass);
};
/*
@ -858,6 +813,11 @@
* Caption loading and initialization
*/
var initCaption = function() {
// Check if we have browser support for captions
if (typeof(TextTrack) === "undefined") {
return;
}
// get all <track> elements
acorn.$track = $('track', acorn.$self);
@ -919,7 +879,6 @@
} else {
loadCaption(tracksrc);
}
$(this).toggleClass(captionBtnActiveClass);
});
// load default caption if captionsOn is true
@ -1002,66 +961,3 @@
};
})(jQuery);
/*
* parseSrt function
* JavaScript SRT parser by Silvia Pfeiffer <silvia@siliva-pfeiffer.de>
* http://silvia-pfeiffer.de/
*
* Tri-licensed under MPL 1.1/GPL 2.0/LGPL 2.1
* http://www.gnu.org/licenses/gpl.html
* http://www.gnu.org/licenses/lgpl.html
* http://www.mozilla.org/MPL/
*
* The Initial Developer of the Original Code is Mozilla Corporation.
* Portions created by the Initial Developer are Copyright (C) 2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Silvia Pfeiffer <silvia@siliva-pfeiffer.de>
*
*
*/
function parseSrt(data) {
var srt = data.replace(/\r+/g, ''); // remove dos newlines
srt = srt.replace(/^\s+|\s+$/g, ''); // trim white space start and end
srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, ''); // remove all html tags for security reasons
// get captions
var captions = [];
var caplist = srt.split('\n\n');
for (var i = 0; i < caplist.length; i=i+1) {
var caption = "";
var content, start, end, s;
caption = caplist[i];
s = caption.split(/\n/);
if (s[0].match(/^\d+$/) && s[1].match(/\d+:\d+:\d+/)) {
// ignore caption number in s[0]
// parse time string
var m = s[1].match(/(\d+):(\d+):(\d+)(?:,(\d+))?\s*--?>\s*(\d+):(\d+):(\d+)(?:,(\d+))?/);
if (m) {
start =
(parseInt(m[1], 10) * 60 * 60) +
(parseInt(m[2], 10) * 60) +
(parseInt(m[3], 10)) +
(parseInt(m[4], 10) / 1000);
end =
(parseInt(m[5], 10) * 60 * 60) +
(parseInt(m[6], 10) * 60) +
(parseInt(m[7], 10)) +
(parseInt(m[8], 10) / 1000);
} else {
// Unrecognized timestring
continue;
}
// concatenate text lines to html text
content = s.slice(2).join("<br>");
} else {
// file format error or comment lines
continue;
}
captions.push({start: start, end: end, content: content});
}
return captions;
}

View File

@ -310,6 +310,25 @@ load_video = function(){
webmsource.setAttribute('type','video/webm; codecs="vp8.0, vorbis"');
video.appendChild(webmsource);
// Try to load the captions
// TODO this all should be done asynchronously...
var capReq = new XMLHttpRequest();
capReq.open('GET', RECORDINGS + '/captions.json', /*async=*/false);
capReq.send();
if (capReq.status == 200) {
console.log("==Loading closed captions");
// With sync request, responseType should always be blank (=="text")
var captions = JSON.parse(capReq.responseText);
for (var i = 0; i < captions.length; i++) {
var track = document.createElement("track");
track.setAttribute('kind', 'captions');
track.setAttribute('label', captions[i]['localeName']);
track.setAttribute('srclang', captions[i]['locale']);
track.setAttribute('src', RECORDINGS + '/caption_' + captions[i]['locale'] + '.vtt');
video.appendChild(track);
}
}
/*var time_manager = Popcorn("#video");
var pc_webcam = Popcorn("#webcam");
time_manager.on( "timeupdate", function() {

View File

@ -182,6 +182,12 @@ if not FileTest.directory?(target_dir)
BigBlueButton.process_multiple_videos(target_dir, temp_dir, meeting_id, width, height, presentation_props['audio_offset'], presentation_props['include_deskshare'])
end
BigBlueButton.logger.info("Generating closed captions")
ret = BigBlueButton.exec_ret('utils/gen_webvtt', '-i', raw_archive_dir, '-o', target_dir)
if ret != 0
raise "Generating closed caption files failed"
end
process_done = File.new("#{recording_dir}/status/processed/#{meeting_id}-presentation.done", "w")
process_done.write("Processed #{meeting_id}")
process_done.close

View File

@ -960,6 +960,15 @@ begin
BigBlueButton.logger.info("Copied audio.ogg file")
end
if File.exist?("#{$process_dir}/captions.json")
BigBlueButton.logger.info("Copying caption files")
FileUtils.cp("#{$process_dir}/captions.json", package_dir)
Dir.glob("#{$process_dir}/caption_*.vtt").each do |caption|
BigBlueButton.logger.debug(caption)
FileUtils.cp(caption, package_dir)
end
end
processing_time = File.read("#{$process_dir}/processing_time")