/* -------------------------------------------------------------------------- */ /* */ /* TEST SET DOCUMENT BASE */ /* */ /* Frans Coenen */ /* */ /* Wednesday 21 December 2005 */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure SetDocumentBase | +-- TestSetDocumentBase */ //package lucsKDD_ARM; /** Class containing methods to describe a set of test documents in terms of identified phrases (those conatined in the phrase bin tree). @author Frans Coenen @version 21 December 2005. */ public class TestSetDocumentBase extends SetDocumentBase { /* ------------------------------- */ /* */ /* FIELDS */ /* */ /* ------------------------------- */ /** String array to store word content for a single test set document. */ private String[] testSetDoc = null; /** Current index into testSetDoc. */ private int currentIndex = 0; /** Test set data array to hold entire test set in the form of a set of attribute reference numbers. */ private short[][] testDataArray = null; /* ------------------------------------ */ /* */ /* CONSTRUCTORS */ /* */ /* ------------------------------------ */ /** One argument constructor. @param numberOfDocs the number of documents in the test set document base. */ public TestSetDocumentBase(int numberOfDocs) { // initialise data array. testDataArray = new short[numberOfDocs][]; for(int index=0;index Test set document array will eventually contain all the words represented in a test set document. @param word the given word. */ public void addWord(String word) { testSetDoc[currentIndex] = word; currentIndex++; } /* ----------------------------------------------------- */ /* */ /* FIND PHRASE IN TEST DATA ARRAY */ /* */ /* ----------------------------------------------------- */ /** Finds given phrase in test set document array of strings.

Used to generate test set data (attribute number) array. @param phrase the given phrase. @param sigWord the first significant word in the phrase. @param indexSigWord the index of the first significant word in the phrase. @param docNum the current document ID number. @param attNum the current attribute (phrase) ID number. */ public void findInTestSetDoc(String[] phrase, String sigWord, int indexSigWord, int docNum, short attNum) { // Finf all occurances (indexes) of first significant word in phrase in // test set document. int[] indexesFound = findSigWordInTestSetDoc(sigWord); // If no occurances found return. if (indexesFound==null) return; // Otherwise for each occurance check if rest of phrase present. for (int index=0;index=(testSetDoc.length-1)) return(PHRASE_NOT_FOUND); // Otherwise compare phrase word with test set document if (!phrase[index].equals(testSetDoc[offset])) return(PHRASE_NOT_FOUND); } } // Increment offset offset++; } //Rerturn return(PHRASE_FOUND); } /** Adds phrase attibute to data array. document number to obtian the array index. @param docNumber the given document number. @param attNum the current attribute number. */ public void addToDataArray(int docNumber, short attNum) { int length = testDataArray[docNumber].length; short[] tempDataArray = new short[length+1]; int index = 0; // Add attribute number to records in data array, first copy // record, then add attribute number, and then reassign. for (;index Used to generate test set data array. @param keyWord the given key word. @param docNum the current document ID number. @param attNum the current attribute (phrase) ID number. */ public void findInTestSetDoc(String keyWord, int docNum, short attNum) { // Process test set document for (int index=0;index