From 0faee47ad8d3a7341d2b9552fb217704391d8500 Mon Sep 17 00:00:00 2001 From: vladisav Date: Thu, 10 Sep 2015 00:26:26 +0200 Subject: [PATCH 1/3] Adds FeatureExtractor and corresponding test class; --- .../edu/cmu/sphinx/api/FeatureExtractor.java | 146 ++++++++++++++++++ .../cmu/sphinx/api/FeatureExtractorTest.java | 109 +++++++++++++ .../cmu/sphinx/api/10001-90210-01803.features | 1 + 3 files changed, 256 insertions(+) create mode 100644 sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java create mode 100644 sphinx4-core/src/test/java/edu/cmu/sphinx/api/FeatureExtractorTest.java create mode 100644 sphinx4-core/src/test/resources/edu/cmu/sphinx/api/10001-90210-01803.features diff --git a/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java b/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java new file mode 100644 index 000000000..5539d6a69 --- /dev/null +++ b/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java @@ -0,0 +1,146 @@ +package edu.cmu.sphinx.api; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import edu.cmu.sphinx.frontend.Data; +import edu.cmu.sphinx.frontend.DataEndSignal; +import edu.cmu.sphinx.frontend.DataProcessingException; +import edu.cmu.sphinx.frontend.DoubleData; +import edu.cmu.sphinx.frontend.FloatData; +import edu.cmu.sphinx.frontend.FrontEnd; +import edu.cmu.sphinx.frontend.util.StreamDataSource; +import edu.cmu.sphinx.util.props.ConfigurationManager; + +/** + * Extracts features from input stream + * + * @author Vladisav Jelisavcic + * + */ +class FeatureExtractor { + private FrontEnd frontEnd; + private StreamDataSource audioSource; + private int featureLength = -1; + + /** The logger for this class */ + private static final Logger logger = Logger + .getLogger("edu.cmu.sphinx.api.FeatureExtractor"); + + /** + * Constructs a FeatureExtractor. + * + * @param cm + * the configuration manager + * @param frontEndName + * the name for the frontEnd to be used + * @param inputStream + * data stream + * + * @throws IOException if error occurred + */ + public FeatureExtractor(ConfigurationManager cm, String frontEndName, InputStream inputStream) + throws IOException { + + if(cm.lookup(frontEndName) == null) { + throw new RuntimeException("No such frontend: " + frontEndName); + } + + frontEnd = (FrontEnd) cm.lookup(frontEndName); + audioSource = (StreamDataSource) cm.lookup("streamDataSource"); + audioSource.setInputStream(inputStream); + } + + /** + * Extracts all features from the supplied InputStream. + * + * @return float[][] when called first time, null otherwise + * + * @throws DataProcessinException if error occurred + */ + public float[][] getAllFeatures() throws DataProcessingException { + List featureList = new ArrayList(); + + Data feature = frontEnd.getData(); + if(feature == null) + return null; + + while (!(feature instanceof DataEndSignal)) { + if (feature instanceof DoubleData) { + double[] featureData = ((DoubleData) feature).getValues(); + if (featureLength < 0) { + featureLength = featureData.length; + logger.info("Feature length: " + featureLength); + } + float[] convertedData = new float[featureData.length]; + for (int i = 0; i < featureData.length; i++) { + convertedData[i] = (float) featureData[i]; + } + featureList.add(convertedData); + } else if (feature instanceof FloatData) { + float[] featureData = ((FloatData) feature).getValues(); + if (featureLength < 0) { + featureLength = featureData.length; + logger.info("Feature length: " + featureLength); + } + featureList.add(featureData); + } + feature = frontEnd.getData(); + } + + float[][] allFeatures = new float[featureList.size()][]; + for(int i=0;i Date: Mon, 14 Sep 2015 09:50:55 +0200 Subject: [PATCH 2/3] Fixes minor issues with the FeatureExtractor; --- .../edu/cmu/sphinx/api/FeatureExtractor.java | 240 +++++++++--------- .../cmu/sphinx/api/FeatureExtractorTest.java | 98 ++++--- .../cmu/sphinx/api/10001-90210-01803.features | 2 +- 3 files changed, 162 insertions(+), 178 deletions(-) diff --git a/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java b/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java index 5539d6a69..19b5ccca1 100644 --- a/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java +++ b/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java @@ -4,7 +4,6 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.List; -import java.util.logging.Logger; import edu.cmu.sphinx.frontend.Data; import edu.cmu.sphinx.frontend.DataEndSignal; @@ -12,8 +11,6 @@ import edu.cmu.sphinx.frontend.DoubleData; import edu.cmu.sphinx.frontend.FloatData; import edu.cmu.sphinx.frontend.FrontEnd; -import edu.cmu.sphinx.frontend.util.StreamDataSource; -import edu.cmu.sphinx.util.props.ConfigurationManager; /** * Extracts features from input stream @@ -22,125 +19,120 @@ * */ class FeatureExtractor { - private FrontEnd frontEnd; - private StreamDataSource audioSource; - private int featureLength = -1; - - /** The logger for this class */ - private static final Logger logger = Logger - .getLogger("edu.cmu.sphinx.api.FeatureExtractor"); - - /** - * Constructs a FeatureExtractor. - * - * @param cm - * the configuration manager - * @param frontEndName - * the name for the frontEnd to be used - * @param inputStream - * data stream - * - * @throws IOException if error occurred - */ - public FeatureExtractor(ConfigurationManager cm, String frontEndName, InputStream inputStream) - throws IOException { - - if(cm.lookup(frontEndName) == null) { - throw new RuntimeException("No such frontend: " + frontEndName); - } - - frontEnd = (FrontEnd) cm.lookup(frontEndName); - audioSource = (StreamDataSource) cm.lookup("streamDataSource"); - audioSource.setInputStream(inputStream); - } - - /** - * Extracts all features from the supplied InputStream. - * - * @return float[][] when called first time, null otherwise - * - * @throws DataProcessinException if error occurred - */ - public float[][] getAllFeatures() throws DataProcessingException { + + private FrontEnd frontEnd; + private int featureLength = -1; + + /** + * Constructs a FeatureExtractor. + * + * @param cm + * the configuration manager + * @param frontEndName + * the name for the frontEnd to be used + * @param inputStream + * data stream + * + * @throws IOException + * if error occurred + */ + public FeatureExtractor(InputStream inputStream, int sampleRate) + throws IOException { + Configuration configuration = new Configuration(); + + configuration + .setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us"); + configuration + .setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); + configuration + .setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"); + + Context ctx = new Context(configuration); + ctx.setSampleRate(sampleRate); + ctx.setSpeechSource(inputStream); + + frontEnd = (FrontEnd) ctx.getInstance(FrontEnd.class); + } + + /** + * Extracts all features from the supplied InputStream. + * + * @return float[][] when called first time, null otherwise + * + * @throws DataProcessinException + * if error occurred + */ + public List getAllFeatures() throws DataProcessingException { List featureList = new ArrayList(); - - Data feature = frontEnd.getData(); - if(feature == null) - return null; - - while (!(feature instanceof DataEndSignal)) { - if (feature instanceof DoubleData) { - double[] featureData = ((DoubleData) feature).getValues(); - if (featureLength < 0) { - featureLength = featureData.length; - logger.info("Feature length: " + featureLength); - } - float[] convertedData = new float[featureData.length]; - for (int i = 0; i < featureData.length; i++) { - convertedData[i] = (float) featureData[i]; - } - featureList.add(convertedData); - } else if (feature instanceof FloatData) { - float[] featureData = ((FloatData) feature).getValues(); - if (featureLength < 0) { - featureLength = featureData.length; - logger.info("Feature length: " + featureLength); - } - featureList.add(featureData); - } - feature = frontEnd.getData(); - } - - float[][] allFeatures = new float[featureList.size()][]; - for(int i=0;i data = fe.getAllFeatures(); + + int numSamples = data.size(); + int numFeatures = data.get(0).length; + int numDataPoints = numSamples * numFeatures; - - assertEquals(features.length,numDataPoints); // check if all data points are loaded - for(int i=0;i Date: Mon, 14 Sep 2015 10:09:55 +0200 Subject: [PATCH 3/3] Fixes formatting issues in FeatureExtractor; --- .../main/java/edu/cmu/sphinx/api/FeatureExtractor.java | 10 ++++------ .../java/edu/cmu/sphinx/api/FeatureExtractorTest.java | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java b/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java index 19b5ccca1..9207d5fe3 100644 --- a/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java +++ b/sphinx4-core/src/main/java/edu/cmu/sphinx/api/FeatureExtractor.java @@ -40,12 +40,9 @@ public FeatureExtractor(InputStream inputStream, int sampleRate) throws IOException { Configuration configuration = new Configuration(); - configuration - .setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us"); - configuration - .setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); - configuration - .setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"); + configuration.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us"); + configuration.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); + configuration.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"); Context ctx = new Context(configuration); ctx.setSampleRate(sampleRate); @@ -136,3 +133,4 @@ public float[] getNextFeatureFrame() throws DataProcessingException { } } + diff --git a/sphinx4-core/src/test/java/edu/cmu/sphinx/api/FeatureExtractorTest.java b/sphinx4-core/src/test/java/edu/cmu/sphinx/api/FeatureExtractorTest.java index 9cba5576e..a4adee1ca 100644 --- a/sphinx4-core/src/test/java/edu/cmu/sphinx/api/FeatureExtractorTest.java +++ b/sphinx4-core/src/test/java/edu/cmu/sphinx/api/FeatureExtractorTest.java @@ -30,7 +30,6 @@ public void setUp() { .getResourceAsStream("/edu/cmu/sphinx/api/10001-90210-01803.features"); Scanner sc = new Scanner(asciiStream); int numDataPoints = sc.nextInt(); - System.out.println(numDataPoints); features = new float[numDataPoints]; int i = 0; @@ -99,3 +98,4 @@ public void testGetNextFeatureFrame() throws IOException { } } +