import com.aliasi.classify.Classification; import com.aliasi.classify.Classifier; import com.aliasi.classify.ClassifierEvaluator; import com.aliasi.classify.ConditionalClassification; import com.aliasi.classify.DynamicLMClassifier; import com.aliasi.classify.JointClassification; import com.aliasi.lm.NGramProcessLM; import com.aliasi.util.BoundedPriorityQueue; import com.aliasi.util.Files; import com.aliasi.util.Scored; import com.aliasi.util.ScoredObject; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.util.Iterator; public class PolarityHierarchical { File mPolarityDir; String[] mCategories; DynamicLMClassifier mClassifier; Classifier mSubjectivityClassifier; PolarityHierarchical(String[] args) throws ClassNotFoundException, IOException { System.out.println("\nHIERARCHICAL POLARITY DEMO"); mPolarityDir = new File(args[0],"txt_sentoken"); System.out.println("\nData Directory=" + mPolarityDir); mCategories = mPolarityDir.list(); int nGram = 8; mClassifier = DynamicLMClassifier .createNGramProcess(mCategories,nGram); File modelFile = new File("subjectivity.model"); System.out.println("\nReading Compiled Model from file=" + modelFile); FileInputStream fileIn = new FileInputStream(modelFile); ObjectInputStream objIn = new ObjectInputStream(fileIn); @SuppressWarnings("unchecked") Classifier subjectivityClassifier = (Classifier) objIn.readObject(); mSubjectivityClassifier = subjectivityClassifier; objIn.close(); } void run() throws ClassNotFoundException, IOException { train(); evaluate(); } boolean isTrainingFile(File file) { return file.getName().charAt(2) != '9'; // test on fold 9 } void train() throws IOException { int numTrainingCases = 0; int numTrainingChars = 0; System.out.println("\nTraining."); for (int i = 0; i < mCategories.length; ++i) { String category = mCategories[i]; File file = new File(mPolarityDir,mCategories[i]); File[] trainFiles = file.listFiles(); for (int j = 0; j < trainFiles.length; ++j) { File trainFile = trainFiles[j]; if (isTrainingFile(trainFile)) { ++numTrainingCases; String review = Files.readFromFile(trainFile,"ISO-8859-1"); numTrainingChars += review.length(); mClassifier.train(category,review); } } } System.out.println(" # Training Cases=" + numTrainingCases); System.out.println(" # Training Chars=" + numTrainingChars); // if you want to write the polarity model out for future use, // uncomment the following line // com.aliasi.util.AbstractExternalizable.compileTo(mClassifier,new File("polarity.model")); } void evaluate() throws IOException { ClassifierEvaluator evaluator = new ClassifierEvaluator(null,mCategories); for (int i = 0; i < mCategories.length; ++i) { String category = mCategories[i]; File file = new File(mPolarityDir,mCategories[i]); File[] trainFiles = file.listFiles(); for (int j = 0; j < trainFiles.length; ++j) { File trainFile = trainFiles[j]; if (!isTrainingFile(trainFile)) { String review = Files.readFromFile(trainFile,"ISO-8859-1"); String subjReview = subjectiveSentences(review); Classification classification = mClassifier.classify(subjReview); evaluator.addClassification(category,classification); } } } System.out.println(); System.out.println(evaluator.toString()); } String subjectiveSentences(String review) { String[] sentences = review.split("\n"); BoundedPriorityQueue> pQueue = new BoundedPriorityQueue>(ScoredObject.comparator(), MAX_SENTS); for (int i = 0; i < sentences.length; ++i) { String sentence = sentences[i]; ConditionalClassification subjClassification = (ConditionalClassification) mSubjectivityClassifier.classify(sentences[i]); double subjProb; if (subjClassification.category(0).equals("quote")) subjProb = subjClassification.conditionalProbability(0); else subjProb = subjClassification.conditionalProbability(1); pQueue.offer(new ScoredObject(sentence,subjProb)); } StringBuilder reviewBuf = new StringBuilder(); Iterator> it = pQueue.iterator(); for (int i = 0; it.hasNext(); ++i) { ScoredObject so = it.next(); if (so.score() < .5 && i >= MIN_SENTS) break; reviewBuf.append(so.getObject() + "\n"); } String result = reviewBuf.toString().trim(); return result; } static int MIN_SENTS = 5; static int MAX_SENTS = 25; public static void main(String[] args) { try { new PolarityHierarchical(args).run(); } catch (Throwable t) { System.out.println("Thrown: " + t); t.printStackTrace(System.out); } } }