Merge pull request #3253 from kepstin/recording-captions

Recording captions
2016-08-02 17:32:12 -04:00 · 2016-08-02 17:32:12 -04:00 · 9752a0651f
commit 9752a0651f
parent 6f1c96b3ff a280a0d0ef
6 changed files with 462 additions and 133 deletions
--- a/record-and-playback/core/lib/recordandplayback.rb
+++ b/record-and-playback/core/lib/recordandplayback.rb
@ -125,7 +125,7 @@ module BigBlueButton
  def self.exec_ret(*command)
    BigBlueButton.logger.info "Executing: #{command.join(' ')}"
    IO.popen([*command, :err => [:child, :out]]) do |io|
-      io.lines.each do |line|
+      io.each_line do |line|
        BigBlueButton.logger.info line.chomp
      end
    end
@ -139,7 +139,7 @@ module BigBlueButton
    IO.pipe do |r, w|
      pid = spawn(*command, :out => outio, :err => w)
      w.close
-      r.lines.each do |line|
+      r.each_line do |line|
        BigBlueButton.logger.info line.chomp
      end
      Process.waitpid(pid)
--- a/record-and-playback/core/scripts/utils/gen_webvtt
+++ b/record-and-playback/core/scripts/utils/gen_webvtt
@ -0,0 +1,399 @@
+#!/usr/bin/env python3
+
+# This file is part of BigBlueButton.
+#
+# BigBlueButton is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# BigBlueButton is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with BigBlueButton.  If not, see <http://www.gnu.org/licenses/>.
+
+# To install dependencies on Ubuntu:
+# apt-get install python3 python3-lxml python3-pyicu
+
+from lxml import etree
+from collections import deque
+from fractions import Fraction
+import io
+from icu import Locale, BreakIterator
+import unicodedata
+import html
+import logging
+import json
+import sys
+import os
+import argparse
+
+logging.basicConfig(level=logging.DEBUG)
+
+logger = logging.getLogger(__name__)
+
+def webvtt_timestamp(ms):
+    frac_s = int(ms % 1000)
+    s = int(ms / 1000 % 60)
+    m = int(ms / 1000 / 60 % 60)
+    h = int(ms / 1000 / 60 / 60)
+    return '{:02}:{:02}:{:02}.{:03}'.format(h, m, s, frac_s)
+
+class CaptionLine:
+    def __init__(self):
+        self.text = ""
+        self.start_time = 0
+        self.end_time = 0
+
+class Caption:
+    def __init__(self, locale):
+        self.locale = locale
+        self.text = list()
+        self.timestamps = list()
+        self._del_timestamps = list()
+
+    def apply_edit(self, i, j, timestamp, text):
+        del_timestamp = None
+        if j > i:
+            if self._del_timestamps[i] is not None:
+                del_timestamp = self._del_timestamps[i]
+            else:
+                del_timestamp = self.timestamps[i]
+            self._del_timestamps[i] = del_timestamp
+            logger.debug("Removing text %s at %d:%d, del_ts: %d",
+                    repr(''.join(self.text[i:j])), i, j, del_timestamp)
+
+        if len(text) > 0:
+            logger.debug("Inserting text %s at %d:%d, ts: %d",
+                    repr(''.join(text)), i, j, timestamp)
+
+            if i < len(self.timestamps) and timestamp > self.timestamps[i]:
+                timestamp = self._del_timestamps[i]
+                if timestamp is None and i > 0:
+                    timestamp = self.timestamps[i-1]
+                logger.debug("Out of order timestamps, using ts: %d", timestamp)
+
+        self._del_timestamps[i:j] = [del_timestamp] * len(text)
+        if (i < len(self._del_timestamps)):
+            self._del_timestamps[i] = del_timestamp
+
+        self.text[i:j] = text
+        self.timestamps[i:j] = [timestamp] * len(text)
+
+    def apply_record_events(self, events):
+        record = False
+        ts_offset = 0
+        stop_ts = 0
+        start_ts = None
+        stop_pos = 0
+        start_pos = None
+        for event in events:
+            if event['name'] == 'record_status':
+                status = event['status']
+                timestamp = event['timestamp']
+
+                if status and not record:
+                    record = True
+                    start_ts = timestamp
+                    logger.debug("Recording started at ts: %d", start_ts)
+
+                    # Find the position of the first character after recording
+                    # started
+                    start_pos = stop_pos
+                    while start_pos < len(self.timestamps) and \
+                            self.timestamps[start_pos] < start_ts:
+                        start_pos += 1
+
+                    logger.debug("Replacing characters %d:%d",
+                            stop_pos, start_pos)
+                    self.text[stop_pos:start_pos] = ["\n"]
+                    self.timestamps[stop_pos:start_pos] = [stop_ts - ts_offset]
+
+                    start_pos = stop_pos + 1
+                    ts_offset += start_ts - stop_ts
+                    logger.debug("Timestamp offset now %d", ts_offset)
+
+                    stop_ts = None
+                    stop_pos = None
+
+                if not status and record:
+                    record = False
+                    stop_ts = timestamp
+                    logger.debug("Recording stopped at ts: %d", stop_ts)
+
+                    # Find the position of the first character after recording
+                    # stopped, and apply ts offsets
+                    stop_pos = start_pos
+                    while stop_pos < len(self.timestamps) and \
+                            self.timestamps[stop_pos] < stop_ts:
+                        self.timestamps[stop_pos] -= ts_offset
+                        stop_pos += 1
+
+        if record:
+            logger.debug("No recording stop, applying final ts offsets")
+
+            while start_pos < len(self.timestamps):
+                self.timestamps[start_pos] -= ts_offset
+                start_pos += 1
+
+    @classmethod
+    def from_events(cls, events, apply_record_events=True):
+        captions = {}
+
+        # Apply all of the caption events to generate the full text
+        # with per-character timestamps
+        for event in events:
+            if event['name'] == 'edit_caption_history':
+                locale = event['locale']
+                i = event['start_index']
+                j = event['end_index']
+                timestamp = event['timestamp']
+                text = event['text']
+
+                caption = captions.get(locale)
+                if caption is None:
+                    logger.info("Started caption stream for locale '%s'",
+                            locale)
+                    captions[locale] = caption = cls(locale)
+
+                caption.apply_edit(i, j, timestamp, text)
+
+        if apply_record_events:
+            for locale, caption in captions.items():
+                logger.info("Applying recording events to locale '%s'", locale)
+                caption.apply_record_events(events)
+
+        logger.info("Generated %d caption stream(s)", len(captions))
+        return captions
+
+    def split_lines(self, max_length=32):
+        lines = list()
+
+        str_text = "".join(self.text)
+
+        locale = Locale(self.locale)
+        logger.debug("Using locale %s for word-wrapping",
+                locale.getDisplayName(locale))
+
+        break_iter = BreakIterator.createLineInstance(locale)
+        break_iter.setText(str_text)
+        
+        line = CaptionLine()
+        line_start = 0
+        prev_break = 0
+        next_break = break_iter.following(prev_break)
+
+        # Super simple "greedy" line break algorithm.
+        while prev_break < len(self.text):
+            status = break_iter.getRuleStatus()
+
+            line_end = next_break
+            while line_end > line_start and ( \
+                    self.text[line_end-1].isspace() or \
+                    unicodedata.category(self.text[line_end-1]) in ['Cc', 'Mn']
+                    ):
+                line_end -= 1
+
+            do_break = False
+            text_section = unicodedata.normalize(
+                    'NFC', "".join(self.text[line_start:line_end]))
+            timestamps_section = self.timestamps[line_start:next_break]
+            start_time = min(timestamps_section)
+            end_time = max(timestamps_section)
+            if len(text_section) > max_length:
+                if prev_break == line_start:
+                    # Over-long string. Just chop it into bits
+                    line_end = next_break = prev_break + max_length
+                else:
+                    next_break = prev_break
+                    do_break = True
+
+            else:
+                # Status [100,200) indicates a required (hard) line break
+                if next_break >= len(self.text) or \
+                        (status >= 100 and status < 200):
+                    line.text = text_section
+                    line.start_time = start_time
+                    line.end_time = end_time
+                    do_break = True
+
+            if do_break:
+                logger.debug("text section %d -> %d (%d): %s",
+                        line.start_time, line.end_time,
+                        len(line.text), repr(line.text))
+                lines.append(line)
+                line = CaptionLine()
+                line_start = next_break
+            else:
+                line.text = text_section
+                line.start_time = start_time
+                line.end_time = end_time
+
+            prev_break = next_break
+            next_break = break_iter.following(prev_break)
+
+        return lines
+
+    def write_webvtt(self, f):
+        # Write magic
+        f.write("WEBVTT\n\n".encode('utf-8'))
+
+        lines = self.split_lines()
+        for i, line in enumerate(lines):
+            # Don't generate a cue for empty lines
+            if len(line.text) == 0:
+                continue
+
+            start_time = line.start_time
+            end_time = line.end_time
+
+            if i + 1 < len(lines):
+                next_start_time = lines[i + 1].start_time
+                # If the next line is close after the current line, make the
+                # timestamps continuous so the subtitle doesn't "blink"
+                if next_start_time - end_time < 1000:
+                    end_time = next_start_time
+
+            # Apply some duration cleanup heuristics to give some reasonable
+            # line durations
+            duration = end_time - start_time
+            # Make lines go away if they've been showing for >16 seconds
+            if duration > 16000:
+                duration = 16000
+            # A minimum per-character time for display (up to 3.2s for 32char)
+            if duration < 100 * len(line.text):
+                duration = 100 * len(line.text)
+            # Never show a caption (even a short one) for less than 1s
+            if duration < 1000:
+                duration = 1000
+
+            end_time = start_time + duration
+
+            f.write("{} --> {}\n".format(
+                    webvtt_timestamp(start_time),
+                    webvtt_timestamp(end_time)
+                    ).encode('utf-8'))
+            f.write(html.escape(line.text, quote=False).encode('utf-8'))
+            f.write("\n\n".encode('utf-8'))
+
+    def caption_desc(self):
+        locale = Locale(self.locale)
+        return {
+                "locale": self.locale,
+                "localeName": locale.getDisplayName(locale)
+                }
+
+
+def parse_record_status(event, element):
+    userId = element.find('userId')
+    status = element.find('status')
+
+    event['name'] = 'record_status'
+    event['user_id'] = userId.text
+    event['status'] = (status.text == 'true')
+
+def parse_caption_edit(event, element):
+    locale = element.find('locale')
+    text = element.find('text')
+    startIndex = element.find('startIndex')
+    endIndex = element.find('endIndex')
+    localeCode = element.find('localeCode')
+
+    event['name'] = 'edit_caption_history'
+    event['locale_name'] = locale.text
+    if localeCode is not None:
+        event['locale'] = localeCode.text
+    else:
+        # Fallback for missing 'localeCode'
+        event['locale'] = "en"
+    if text.text is None:
+        event['text'] = list()
+    else:
+        event['text'] = list(text.text)
+    event['start_index'] = int(startIndex.text)
+    event['end_index'] = int(endIndex.text)
+
+
+def parse_events(directory="."):
+    start_time = None
+    have_record_events = False
+    events = deque()
+
+    with open("{}/events.xml".format(directory), "rb") as f:
+        for _, element in etree.iterparse(f, tag="event"):
+            try:
+                event = {}
+
+                # Convert timestamps to be in seconds from recording start
+                timestamp = int(element.attrib['timestamp'])
+                if not start_time:
+                    start_time = timestamp
+                timestamp = timestamp - start_time
+
+                # Only need events from these modules
+                if not element.attrib['module'] in ['CAPTION','PARTICIPANT']:
+                    continue
+
+                event['name'] = name = element.attrib['eventname']
+                event['timestamp'] = timestamp
+
+                if name == 'RecordStatusEvent':
+                    parse_record_status(event, element)
+                    have_record_events = True
+                elif name == 'EditCaptionHistoryEvent':
+                    parse_caption_edit(event, element)
+                else:
+                    logger.debug("Unhandled event: %s", name)
+                    continue
+
+                events.append(event)
+            finally:
+                element.clear()
+
+    if not have_record_events:
+        # Add a fake record start event to the events list
+        event = {
+                'name': 'record_status',
+                'user_id': None,
+                'timestamp': 0,
+                'status': True
+                }
+        events.appendleft(event)
+
+    return events
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+            description="Generate WebVTT files from BigBlueButton captions",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("-i", "--input", metavar="PATH",
+            help="input directory with events.xml file",
+            default=os.curdir)
+    parser.add_argument("-o", "--output", metavar="PATH",
+            help="output directory",
+            default=os.curdir)
+    args = parser.parse_args()
+
+    rawdir = args.input
+    outputdir = args.output
+
+    logger.info("Reading recording events file")
+    events = parse_events(rawdir)
+
+    logger.info("Generating caption data from recording events")
+    captions = Caption.from_events(events)
+    for locale, caption in captions.items():
+        filename = os.path.join(outputdir, "caption_{}.vtt".format(locale))
+        logger.info("Writing captions for locale %s to %s", locale, filename)
+        with open(filename, "wb") as f:
+            caption.write_webvtt(f)
+
+    filename = os.path.join(outputdir, "captions.json")
+    logger.info("Writing captions index file to %s", filename)
+
+    caption_descs = [ caption.caption_desc() for caption in captions.values() ]
+    with open(filename, "w") as f:
+        json.dump(caption_descs, f)
--- a/record-and-playback/presentation/playback/presentation/0.9.0/acornmediaplayer/jquery.acornmediaplayer.js
+++ b/record-and-playback/presentation/playback/presentation/0.9.0/acornmediaplayer/jquery.acornmediaplayer.js
@ -271,11 +271,6 @@
 						acorn.$seek.slider('value', currenttime);
 					}
 				}
-				
-				// If captions are active, update them
-				if(captionsActive) { 
-					updateCaption(); 
-				}
 			};
 			
 			/*
@ -692,33 +687,14 @@
 			var captionRadioName = 'acornCaptions' + uniqueID();
 			 
 			var captionOff = function() {
-				captions = '';
-				acorn.$caption.hide();
-				activeCaptions = false;
-
-				acorn.$transcriptBtn.removeClass(transcriptBtnActiveClass).hide();
-				acorn.$transcript.hide();
+				for (var i = 0; i < acorn.$track.length; i++) {
+					var track = acorn.$track[i];
+					track.track.mode = "disabled";
+				}
 				
 				acorn.$captionBtn.removeClass(captionBtnActiveClass);
 			};
 			
-			/*
-			 * Update caption based on "currentTime"
-			 * Borrowed and adapted from Bruce Lawson's “Accessible HTML5 Video with JavaScripted captions”
-			 * http://dev.opera.com/articles/view/accessible-html5-video-with-javascripted-captions/
-			 */
-			var updateCaption = function() {			
-				var now = acorn.$self[0].currentTime; // how soon is now?
-				var text = "";
-				for (var i = 0; i < captions.length; i++) {
-					if (now >= captions[i].start && now <= captions[i].end) {
-						text = captions[i].content; // yes? then load it into a variable called text
-						break;
-					}
-				}
-				acorn.$caption.html(text); // and put contents of text into caption div
-			};
-			
 			/*
 			 * Initialize the Caption Selector
 			 * Used when multiple <track>s are present
@ -792,54 +768,33 @@
 			 * Takes the url as a parameter
 			 */
 			var loadCaption = function(url) {
-				// add a loading class to the Caption Button when starting to load the caption
-				acorn.$captionBtn.addClass(captionBtnLoadingClass);
-				// make an AJAX request to load the file
-				$.ajax({
-					url: url,
-					success: function(data) {
-						/*
-						 * On success use a SRT parser on the loaded data
-						 * Using JavaScript SRT parser by Silvia Pfeiffer <silvia@siliva-pfeiffer.de>
-						 * parseSrt included at the end of this file
-						 */
-						captions = parseSrt(data);
+				// Iterate through the available captions, and disable all but the selected one
+				for (var i = 0; i < acorn.$track.length; i++) {
+					var track = acorn.$track[i];
+					if (track.getAttribute('src') == url) {
+						track.track.mode = "showing";

+						// TODO transcript markup?
 						// show the Transcript Button						
-						acorn.$transcriptBtn.show();
+						//acorn.$transcriptBtn.show();
 						
 						/* 
 						 * Generate the markup for the transcript
 						 * Markup based on Bruce Lawson's “Accessible HTML5 Video with JavaScripted captions”
 						 * http://dev.opera.com/articles/view/accessible-html5-video-with-javascripted-captions/
 						 */
-						var transcriptText = '';
-						$(captions).each(function() {
-							transcriptText += '<span data-begin="' + parseInt(this.start, 10) + '" data-end=' + parseInt(this.end, 10) + '>' + this.content.replace("'","") + '</span>';
-						});
+						//var transcriptText = '';
+						//$(captions).each(function() {
+						//	transcriptText += '<span data-begin="' + parseInt(this.start, 10) + '" data-end=' + parseInt(this.end, 10) + '>' + this.content.replace("'","") + '</span>';
+						//});
 						// append the generated markup
-						acorn.$transcript.html(transcriptText);
-						
-						// show caption
-						acorn.$caption.show();
+						//acorn.$transcript.html(transcriptText);
+					} else {
+						track.track.mode = "disabled";
+					}
+				}
 				captionsActive = true;
-						
-						// in case the media is paused and timeUpdate is not triggered, trigger it
-						if(acorn.$self.prop('paused')) {
-							updateCaption();
-						}
-						
-						acorn.$captionBtn.addClass(captionBtnActiveClass).removeClass(captionBtnLoadingClass);
-					},
-					error: function() {
-						// if an error occurs while loading the caption, turn captions off
-						captionOff();
-						// if a console is available, log error
-						if(console) {
-							console.log('Error loading captions');
-						}
-					}
-				});
+				acorn.$captionBtn.addClass(captionBtnActiveClass);
 			};
 			
 			/*			 
@ -858,6 +813,11 @@
 			 * Caption loading and initialization
 			 */
 			var initCaption = function() {
+				// Check if we have browser support for captions
+				if (typeof(TextTrack) === "undefined") {
+					return;
+				}
+
 				// get all <track> elements
 				acorn.$track = $('track', acorn.$self);
 				
@ -919,7 +879,6 @@
 						} else {
 							loadCaption(tracksrc);
 						}
-						$(this).toggleClass(captionBtnActiveClass);
 					});

 					// load default caption if captionsOn is true
@ -1002,66 +961,3 @@
 	};

 })(jQuery);
-
-/* 
- * parseSrt function
- * JavaScript SRT parser by Silvia Pfeiffer <silvia@siliva-pfeiffer.de>
- * http://silvia-pfeiffer.de/ 
- * 
- * Tri-licensed under MPL 1.1/GPL 2.0/LGPL 2.1
- *  http://www.gnu.org/licenses/gpl.html  
- *  http://www.gnu.org/licenses/lgpl.html
- *  http://www.mozilla.org/MPL/
- *
- * The Initial Developer of the Original Code is Mozilla Corporation.
- * Portions created by the Initial Developer are Copyright (C) 2009
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- *  Silvia Pfeiffer <silvia@siliva-pfeiffer.de>
- *
- *
- */
-function parseSrt(data) {
-    var srt = data.replace(/\r+/g, ''); // remove dos newlines
-    srt = srt.replace(/^\s+|\s+$/g, ''); // trim white space start and end
-    srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, ''); // remove all html tags for security reasons
-
-    // get captions
-    var captions = [];
-    var caplist = srt.split('\n\n');
-    for (var i = 0; i < caplist.length; i=i+1) {
-        var caption = "";
-        var content, start, end, s;
-        caption = caplist[i];
-        s = caption.split(/\n/);
-        if (s[0].match(/^\d+$/) && s[1].match(/\d+:\d+:\d+/)) {
-            // ignore caption number in s[0]
-            // parse time string
-            var m = s[1].match(/(\d+):(\d+):(\d+)(?:,(\d+))?\s*--?>\s*(\d+):(\d+):(\d+)(?:,(\d+))?/);
-            if (m) {
-                start =
-                  (parseInt(m[1], 10) * 60 * 60) +
-                  (parseInt(m[2], 10) * 60) +
-                  (parseInt(m[3], 10)) +
-                  (parseInt(m[4], 10) / 1000);
-                end =
-                  (parseInt(m[5], 10) * 60 * 60) +
-                  (parseInt(m[6], 10) * 60) +
-                  (parseInt(m[7], 10)) +
-                  (parseInt(m[8], 10) / 1000);
-            } else {
-                // Unrecognized timestring
-                continue;
-            }
-            // concatenate text lines to html text
-            content = s.slice(2).join("<br>");
-        } else {
-            // file format error or comment lines
-            continue;
-        }
-        captions.push({start: start, end: end, content: content});
-    }
-
-    return captions;
-}
--- a/record-and-playback/presentation/playback/presentation/0.9.0/playback.js
+++ b/record-and-playback/presentation/playback/presentation/0.9.0/playback.js
@ -310,6 +310,25 @@ load_video = function(){
   webmsource.setAttribute('type','video/webm; codecs="vp8.0, vorbis"');
   video.appendChild(webmsource);

+   // Try to load the captions
+   // TODO this all should be done asynchronously...
+   var capReq = new XMLHttpRequest();
+   capReq.open('GET', RECORDINGS + '/captions.json', /*async=*/false);
+   capReq.send();
+   if (capReq.status == 200) {
+	   console.log("==Loading closed captions");
+	   // With sync request, responseType should always be blank (=="text")
+	   var captions = JSON.parse(capReq.responseText);
+	   for (var i = 0; i < captions.length; i++) {
+		   var track = document.createElement("track");
+		   track.setAttribute('kind', 'captions');
+		   track.setAttribute('label', captions[i]['localeName']);
+		   track.setAttribute('srclang', captions[i]['locale']);
+		   track.setAttribute('src', RECORDINGS + '/caption_' + captions[i]['locale'] + '.vtt');
+		   video.appendChild(track);
+	   }
+   }
+
   /*var time_manager = Popcorn("#video");
   var pc_webcam = Popcorn("#webcam");
   time_manager.on( "timeupdate", function() {
--- a/record-and-playback/presentation/scripts/process/presentation.rb
+++ b/record-and-playback/presentation/scripts/process/presentation.rb
@ -182,6 +182,12 @@ if not FileTest.directory?(target_dir)
      BigBlueButton.process_multiple_videos(target_dir, temp_dir, meeting_id, width, height, presentation_props['audio_offset'], presentation_props['include_deskshare'])
    end

+    BigBlueButton.logger.info("Generating closed captions")
+    ret = BigBlueButton.exec_ret('utils/gen_webvtt', '-i', raw_archive_dir, '-o', target_dir)
+    if ret != 0
+      raise "Generating closed caption files failed"
+    end
+
    process_done = File.new("#{recording_dir}/status/processed/#{meeting_id}-presentation.done", "w")
    process_done.write("Processed #{meeting_id}")
    process_done.close
--- a/record-and-playback/presentation/scripts/publish/presentation.rb
+++ b/record-and-playback/presentation/scripts/publish/presentation.rb
@ -960,6 +960,15 @@ begin
          BigBlueButton.logger.info("Copied audio.ogg file")
        end

+        if File.exist?("#{$process_dir}/captions.json")
+          BigBlueButton.logger.info("Copying caption files")
+          FileUtils.cp("#{$process_dir}/captions.json", package_dir)
+          Dir.glob("#{$process_dir}/caption_*.vtt").each do |caption|
+            BigBlueButton.logger.debug(caption)
+            FileUtils.cp(caption, package_dir)
+          end
+        end
+

        processing_time = File.read("#{$process_dir}/processing_time")