Faster PDF pages counting by replacing "pdf2swf" command with "pdfinfo".

This commit is contained in:
Ghazi Triki 2017-04-20 17:58:44 +01:00
parent 1255e41758
commit 8142031afe
9 changed files with 120 additions and 123 deletions

View File

@ -46,9 +46,7 @@ with BigBlueButton; if not, see <http://www.gnu.org/licenses/>.
<property name="swfToolsDir" value="${swfToolsDir}"/>
</bean>
<bean id="pageCounter" class="org.bigbluebutton.presentation.imp.Pdf2SwfPageCounter">
<property name="swfToolsDir" value="${swfToolsDir}"/>
</bean>
<bean id="pageCounter" class="org.bigbluebutton.presentation.imp.PdfPageCounter"/>
<bean id="pageCounterService" class="org.bigbluebutton.presentation.imp.PageCounterService">
<property name="pageCounter" ref="pageCounter"/>

View File

@ -28,11 +28,11 @@ import org.slf4j.LoggerFactory;
import com.zaxxer.nuprocess.NuAbstractProcessHandler;
import com.zaxxer.nuprocess.NuProcess;
public abstract class AbstractPageConverterHandler extends
public abstract class AbstractCommandHandler extends
NuAbstractProcessHandler {
private static Logger log = LoggerFactory
.getLogger(AbstractPageConverterHandler.class);
.getLogger(AbstractCommandHandler.class);
protected NuProcess nuProcess;
protected int exitCode;
@ -86,7 +86,7 @@ public abstract class AbstractPageConverterHandler extends
return stderrBuilder.indexOf(value) > -1;
}
public Boolean isConversionSuccessful() {
public Boolean isCommandSuccessful() {
return !exitedWithError();
}
}

View File

@ -22,7 +22,7 @@ package org.bigbluebutton.presentation.handlers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Pdf2PngPageConverterHandler extends AbstractPageConverterHandler {
public class Pdf2PngPageConverterHandler extends AbstractCommandHandler {
private static Logger log = LoggerFactory
.getLogger(Pdf2PngPageConverterHandler.class);

View File

@ -26,11 +26,13 @@ import org.slf4j.LoggerFactory;
/**
*
* The default command output the anlayse looks like the following: </br> 20
* DEBUG Using</br> 60 VERBOSE Updating font</br> 80 VERBOSE Drawing
* The default command output the anlayse looks like the following: </br>
* 20 DEBUG Using</br>
* 60 VERBOSE Updating font</br>
* 80 VERBOSE Drawing
*
*/
public class Pdf2SwfPageConverterHandler extends AbstractPageConverterHandler {
public class Pdf2SwfPageConverterHandler extends AbstractCommandHandler {
private static Logger log = LoggerFactory
.getLogger(Pdf2SwfPageConverterHandler.class);
@ -42,11 +44,6 @@ public class Pdf2SwfPageConverterHandler extends AbstractPageConverterHandler {
private static String TEXT_TAG_PATTERN = "\\d+\\s" + TEXT_TAG_OUTPUT;
private static String IMAGE_TAG_PATTERN = "\\d+\\s" + IMAGE_TAG_OUTPUT;
@Override
public Boolean isConversionSuccessful() {
return !exitedWithError();
}
/**
*
* @return The number of PlaceObject2 tags in the generated SWF

View File

@ -0,0 +1,47 @@
/**
* BigBlueButton open source conferencing system - http://www.bigbluebutton.org/
*
* Copyright (c) 2017 BigBlueButton Inc. and by respective authors (see below).
*
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
* Foundation; either version 3.0 of the License, or (at your option) any later
* version.
*
* BigBlueButton is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with BigBlueButton; if not, see <http://www.gnu.org/licenses/>.
*
*/
package org.bigbluebutton.presentation.handlers;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PdfPageCounterHandler extends AbstractCommandHandler {
private static Logger log = LoggerFactory
.getLogger(PdfPageCounterHandler.class);
private static final Pattern PAGE_NUMBER_PATTERN = Pattern
.compile("Pages:(?:\\s*)(\\d*)");
/**
* @return The number of pages inside the scanned PDF document
*/
public int numberOfPages() {
try {
Matcher m = PAGE_NUMBER_PATTERN.matcher(stdoutBuilder.toString());
m.find();
return Integer.parseInt(m.group(1).trim());
} catch (Exception e) {
return 0;
}
}
}

View File

@ -22,7 +22,7 @@ package org.bigbluebutton.presentation.handlers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Png2SwfPageConverterHandler extends AbstractPageConverterHandler {
public class Png2SwfPageConverterHandler extends AbstractCommandHandler {
private static Logger log = LoggerFactory
.getLogger(Png2SwfPageConverterHandler.class);

View File

@ -93,7 +93,7 @@ public class Pdf2SwfPageConverter implements PageConverter {
+ defineTextThreshold + imageTagThreshold) * 2;
File destFile = new File(dest);
if (pHandler.isConversionSuccessful() && destFile.exists()
if (pHandler.isCommandSuccessful() && destFile.exists()
&& pHandler.numberOfPlacements() < placementsThreshold
&& pHandler.numberOfTextTags() < defineTextThreshold
&& pHandler.numberOfImageTags() < imageTagThreshold) {
@ -110,7 +110,7 @@ public class Pdf2SwfPageConverter implements PageConverter {
logData.put("presId", pres.getId());
logData.put("filename", pres.getName());
logData.put("page", page);
logData.put("convertSuccess", pHandler.isConversionSuccessful());
logData.put("convertSuccess", pHandler.isCommandSuccessful());
logData.put("fileExists", destFile.exists());
logData.put("numObjectTags", pHandler.numberOfPlacements());
logData.put("numTextTags", pHandler.numberOfTextTags());
@ -178,7 +178,7 @@ public class Pdf2SwfPageConverter implements PageConverter {
// conversion
tempPng.delete();
boolean doneSwf = pSwfHandler.isConversionSuccessful();
boolean doneSwf = pSwfHandler.isCommandSuccessful();
long convertEnd = System.currentTimeMillis();

View File

@ -1,104 +0,0 @@
/**
* BigBlueButton open source conferencing system - http://www.bigbluebutton.org/
*
* Copyright (c) 2012 BigBlueButton Inc. and by respective authors (see below).
*
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
* Foundation; either version 3.0 of the License, or (at your option) any later
* version.
*
* BigBlueButton is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with BigBlueButton; if not, see <http://www.gnu.org/licenses/>.
*
*/
package org.bigbluebutton.presentation.imp;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.Timer;
import java.util.TimerTask;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bigbluebutton.presentation.PageCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Pdf2SwfPageCounter implements PageCounter {
private static Logger log = LoggerFactory.getLogger(Pdf2SwfPageCounter.class);
private static final Pattern PAGE_NUMBER_PATTERN = Pattern.compile("page=([0-9]+)(?: .+)");
private String SWFTOOLS_DIR;
public int countNumberOfPages(File presentationFile) {
int numPages = 0; //total numbers of this pdf
String COMMAND = SWFTOOLS_DIR + File.separator + "pdf2swf -I " + presentationFile.getAbsolutePath();
Timer timer = null;
Process p = null;
try {
timer = new Timer(true);
InterruptTimerTask interrupter = new InterruptTimerTask(Thread.currentThread());
timer.schedule(interrupter, 60000);
p = Runtime.getRuntime().exec(COMMAND);
BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
String info;
Matcher matcher;
while ((info = stdInput.readLine()) != null) {
//The output would be something like this 'page=21 width=718.00 height=538.00'.
//We need to extract the page number (i.e. 21) from it.
matcher = PAGE_NUMBER_PATTERN.matcher(info);
if (matcher.matches()) {
numPages = Integer.valueOf(matcher.group(1).trim()).intValue();
}
}
while ((info = stdError.readLine()) != null) {
log.error(info);
}
stdInput.close();
stdError.close();
p.waitFor();
} catch(Exception e) {
log.info("TIMEDOUT excuting : " + COMMAND);
p.destroy();
} finally {
timer.cancel(); // If the process returns within the timeout period, we have to stop the interrupter
// so that it does not unexpectedly interrupt some other code later.
Thread.interrupted(); // We need to clear the interrupt flag on the current thread just in case
// interrupter executed after waitFor had already returned but before timer.cancel
// took effect.
//
// Oh, and there's also Sun bug 6420270 to worry about here.
}
return numPages;
}
public void setSwfToolsDir(String dir) {
SWFTOOLS_DIR = dir;
}
class InterruptTimerTask extends TimerTask {
private Thread thread;
public InterruptTimerTask(Thread t) {
this.thread = t;
}
public void run() {
thread.interrupt();
}
}
}

View File

@ -0,0 +1,59 @@
/**
* BigBlueButton open source conferencing system - http://www.bigbluebutton.org/
*
* Copyright (c) 2012 BigBlueButton Inc. and by respective authors (see below).
*
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
* Foundation; either version 3.0 of the License, or (at your option) any later
* version.
*
* BigBlueButton is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with BigBlueButton; if not, see <http://www.gnu.org/licenses/>.
*
*/
package org.bigbluebutton.presentation.imp;
import java.io.File;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import org.bigbluebutton.presentation.PageCounter;
import org.bigbluebutton.presentation.handlers.PdfPageCounterHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zaxxer.nuprocess.NuProcess;
import com.zaxxer.nuprocess.NuProcessBuilder;
public class PdfPageCounter implements PageCounter {
private static Logger log = LoggerFactory.getLogger(PdfPageCounter.class);
private int WAIT_FOR_SEC = 5;
public int countNumberOfPages(File presentationFile) {
int numPages = 0; // total numbers of this pdf
NuProcessBuilder pdfInfo = new NuProcessBuilder(
Arrays.asList("pdfinfo", presentationFile.getAbsolutePath()));
PdfPageCounterHandler pHandler = new PdfPageCounterHandler();
pdfInfo.setProcessListener(pHandler);
NuProcess process = pdfInfo.start();
try {
process.waitFor(WAIT_FOR_SEC, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.error(e.getMessage());
}
numPages = pHandler.numberOfPages();
return numPages;
}
}