import com.aliasi.chunk.CharLmRescoringChunker; import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; import com.aliasi.tokenizer.TokenizerFactory; import com.aliasi.util.AbstractExternalizable; import com.aliasi.util.Streams; import java.io.File; import java.io.IOException; public class TrainConll2002 { static final int NUM_CHUNKINGS_RESCORED = 64; static final int MAX_N_GRAM = 12; static final int NUM_CHARS = 256; static final double LM_INTERPOLATION = MAX_N_GRAM; // default behavior static final boolean SMOOTH_TAGS = true; // java TrainGeneTag public static void main(String[] args) throws IOException { File modelFile = new File(args[0]); File trainFile = new File(args[1]); File devFile = new File(args[2]); System.out.println("Setting up Chunker Estimator"); TokenizerFactory factory = IndoEuropeanTokenizerFactory.INSTANCE; CharLmRescoringChunker chunkerEstimator = new CharLmRescoringChunker(factory, NUM_CHUNKINGS_RESCORED, MAX_N_GRAM, NUM_CHARS, LM_INTERPOLATION, SMOOTH_TAGS); System.out.println("Setting up Data Parser"); Conll2002ChunkTagParser parser = new Conll2002ChunkTagParser(); parser.setHandler(chunkerEstimator); System.out.println("Training with Data from File=" + trainFile); parser.parse(trainFile); System.out.println("Training with Data from File=" + devFile); parser.parse(devFile); System.out.println("Compiling and Writing Model to File=" + modelFile); AbstractExternalizable.compileTo(chunkerEstimator,modelFile); } }