/* -------------------------------------------------------------------------- */ /* */ /* PHRASE MINING DELIMITERS = ORDINARY WORDS AND STOP MARKS, */ /* CONTENTS = SIGNIFICANT AND NOISE WORDS */ /* */ /* Frans Coenen */ /* */ /* Friday 3 February 2006 */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- TextMining | +-- PhraseMining | +-- PhraseMining_DelSO_ContGW */ //package lucsKDD_ARM; // Java packages import java.io.*; import java.util.*; /** Phrase based text classification. Class containing methods to coordinate phrase identification from training set of documents, recast the training and test set in terms of the identified phrases and then produce a classifier from the training set tested on the test set. Phrases defined as follows: Deliminators = stop marks (S) and ordinary words (O). Content = Distinguishing words (G) and wild card words (W). Ignore = Noise words (N) not adjacent to distinguishing (Significant) words. @author Frans Coenen @version 3 February 2006. */ public class PhraseMining_DelSO_ContGW extends PhraseMining { /* ---------------------------------------------------------------- */ /* */ /* FIELDS */ /* */ /* ---------------------------------------------------------------- */ /* NONE */ /* ---------------------------------------------------------------- */ /* */ /* CONSTRUCTORS */ /* */ /* ---------------------------------------------------------------- */ /** Constructor processes command line arguments. @param args the command line arguments (array of String instances). */ public PhraseMining_DelSO_ContGW(String[] args) { super(args); System.out.println("PHRASE MINING\nDelimeters\t = stop marks and " + "ordinary words\nContents\t = at least one significant " + "word and wild cards\n"); } /* ---------------------------------------------------------------- */ /* */ /* METHODS */ /* */ /* ---------------------------------------------------------------- */ /** Creates phrase bin tree where phrase comprises a sequence of words consisting of one or more significant words and any ordinary words delimitted by stop marks or noise words. */ protected void createPhraseBinTree() { docBase.genPhraseBinTree_DelSO_ContGW(phraseBinTree); } }