001 package org.maltparser;
002
003 import java.net.MalformedURLException;
004 import java.net.URL;
005 import java.util.Iterator;
006
007 import org.maltparser.core.exception.MaltChainedException;
008 import org.maltparser.core.flow.FlowChartInstance;
009 import org.maltparser.core.helper.SystemInfo;
010 import org.maltparser.core.helper.Util;
011 import org.maltparser.core.io.dataformat.ColumnDescription;
012 import org.maltparser.core.io.dataformat.DataFormatInstance;
013 import org.maltparser.core.options.OptionManager;
014 import org.maltparser.core.syntaxgraph.DependencyGraph;
015 import org.maltparser.core.syntaxgraph.DependencyStructure;
016 import org.maltparser.core.syntaxgraph.node.DependencyNode;
017 import org.maltparser.parser.SingleMalt;
018
019 /**
020 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser.
021 *
022 * There are two ways to call the MaltParserService:
023 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
024 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
025 * done by the third-party program.
026 *
027 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
028 *
029 * @author Johan Hall
030 */
031 public class MaltParserService {
032 private URL urlMaltJar;
033 private Engine engine;
034 private FlowChartInstance flowChartInstance;
035 private DataFormatInstance dataFormatInstance;
036 private SingleMalt singleMalt;
037 private int optionContainer;
038 private boolean initialized = false;
039
040 /**
041 * Creates a MaltParserService with the option container 0
042 *
043 * @throws MaltChainedException
044 */
045 public MaltParserService() throws MaltChainedException {
046 this(0);
047 }
048
049 /**
050 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program
051 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
052 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
053 *
054 * @param optionContainer an integer from 0 to max value of data type Integer
055 * @throws MaltChainedException
056 */
057 public MaltParserService(int optionContainer) throws MaltChainedException {
058 initialize();
059 setOptionContainer(optionContainer);
060 }
061
062 /**
063 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
064 *
065 * @param commandLine a commandLine string that controls the MaltParser.
066 * @throws MaltChainedException
067 */
068 public void runExperiment(String commandLine) throws MaltChainedException {
069 OptionManager.instance().parseCommandLine(commandLine, optionContainer);
070 engine = new Engine();
071 engine.initialize(optionContainer);
072 engine.process(optionContainer);
073 engine.terminate(optionContainer);
074 }
075
076 /**
077 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
078 *
079 * @param commandLine a commandLine string that controls the MaltParser
080 * @throws MaltChainedException
081 */
082 public void initializeParserModel(String commandLine) throws MaltChainedException {
083 OptionManager.instance().parseCommandLine(commandLine, optionContainer);
084 // Creates an engine
085 engine = new Engine();
086 // Initialize the engine with option container and gets a flow chart instance
087 flowChartInstance = engine.initialize(optionContainer);
088 // Runs the preprocess chart items of the "parse" flow chart
089 if (flowChartInstance.hasPreProcessChartItems()) {
090 flowChartInstance.preprocess();
091 }
092 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
093 singleMalt.getConfigurationDir().initDataFormat();
094 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
095 singleMalt.getSymbolTables(),
096 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value"),
097 OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label"));
098 initialized = true;
099 }
100
101 /**
102 * Parses an array of tokens and returns a dependency structure.
103 *
104 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel().
105 *
106 * @param tokens an array of tokens
107 * @return a dependency structure
108 * @throws MaltChainedException
109 */
110 public DependencyStructure parse(String[] tokens) throws MaltChainedException {
111 if (!initialized) {
112 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
113 }
114 if (tokens == null || tokens.length == 0) {
115 throw new MaltChainedException("Nothing to parse. ");
116 }
117
118 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
119
120 for (int i = 0; i < tokens.length; i++) {
121 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
122 DependencyNode node = outputGraph.addDependencyNode(i+1);
123 String[] items = tokens[i].split("\t");
124 for (int j = 0; j < items.length; j++) {
125 if (columns.hasNext()) {
126 ColumnDescription column = columns.next();
127 if (column.getCategory() == ColumnDescription.INPUT && node != null) {
128 outputGraph.addLabel(node, column.getName(), items[j]);
129 }
130 }
131 }
132 }
133 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
134 // Invoke parse with the output graph
135 singleMalt.parse(outputGraph);
136 return outputGraph;
137 }
138
139 /**
140 * Terminates the parser model.
141 *
142 * @throws MaltChainedException
143 */
144 public void terminateParserModel() throws MaltChainedException {
145 // Runs the postprocess chart items of the "parse" flow chart
146 if (flowChartInstance.hasPostProcessChartItems()) {
147 flowChartInstance.postprocess();
148 }
149
150 // Terminate the flow chart with an option container
151 engine.terminate(optionContainer);
152 }
153
154 private void initialize() throws MaltChainedException {
155 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
156 return; // OptionManager is already initialized
157 }
158 String maltpath = getMaltJarPath();
159 if (maltpath == null) {
160 new MaltChainedException("malt.jar could not be found. ");
161 }
162 urlMaltJar = Util.findURL(maltpath);
163 try {
164 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
165
166 } catch (MalformedURLException e) {
167 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
168 }
169 OptionManager.instance().generateMaps();
170 }
171
172
173 /**
174 * Returns the option container index
175 *
176 * @return the option container index
177 */
178 public int getOptionContainer() {
179 return optionContainer;
180 }
181
182 private void setOptionContainer(int optionContainer) {
183 this.optionContainer = optionContainer;
184 }
185
186 /**
187 * Returns the path of malt.jar file
188 *
189 * @return the path of malt.jar file
190 */
191 public static String getMaltJarPath() {
192 if (SystemInfo.getMaltJarPath() != null) {
193 return SystemInfo.getMaltJarPath().toString();
194 }
195 return null;
196 }
197
198
199 }