/* ------------------------------------------------------------------------- */ /* */ /* D I C T O T A L S U P P O R T T R E E */ /* */ /* Frans Coenen */ /* */ /* 21 March 2001 */ /* (Revise: 20/7/2006, 19/10/2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* ------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree | +-- DIC_Ttree */ // Dept Computer Science, University of Liverpool // java -mx240000000 DataMiningApp FileName Cols Rowes /* Java packages */ import java.io.*; import java.util.*; /* Java GUI packages */ import javax.swing.*; class DIC_Ttree extends TotalSupportTree { // ------------------- FIELDS ------------------------ // Data structures /** The reference to start of the DIC T-tree. */ private DIC_TtreeNode[] startDIC_TtreeRef = null; // Fields /** Number of records in a block for the DIC algorithm in terms of a percentage of the total number of records avialble, 25% by default. */ private double blockSize = 25.0; /** Number of records in a block for the DIC algorithm. */ private int numRowsInBlock = 0; /** Total number of blocks, set to zero by default. */ private int totalBlocks = 0; /** Counter for number of passes through data set. */ private int numPassesThroughDataSet=1; /** Strat index for current block. */ private int startIndex = 0; /** Flag set to 0 if not at end of block. */ private boolean notAtEnd = true; /** Instance of JTextArea class. */ private JTextArea textArea = null; /*--------------------------------------------------------------------------*/ /* */ /* CONSTRUCTORS */ /* */ /*--------------------------------------------------------------------------*/ /** One argument constructor with command line arguments to be process. @param armInstance the given instance of the AssocRuleMining class. */ public DIC_Ttree(AssocRuleMining armInstance) { super(armInstance); } /*--------------------------------------------------------------------------*/ /* */ /* T-TREE BUILDING METHODS */ /* */ /*--------------------------------------------------------------------------*/ /* CREATE TOTAL SUPPORT TREE */ /** Commences start process of generating a total support tree (T-tree). */ public void createTotalSupportTree() { System.out.println("DIC WITH T-TREE\n----------------"); // Calculate minimum support in terms of number of rows System.out.println("Minimum support threshold = " + twoDecPlaces(support) + "% " + "(" + twoDecPlaces(minSupport) + " records)"); System.out.println("Block size = " + twoDecPlaces(blockSize) + "%"); System.out.println("Block size = " + numRowsInBlock + " records"); System.out.println("Num. blocks = " + totalBlocks); // Generate tree createTotalSupportTree2(); // Determine maximum number of levels in the DIC T-tree (for output // purposes). numLevelsInTtree = getMaxNumTtreeLevels(); System.out.println("Number of passes through data set = " + numPassesThroughDataSet); } /** Commences start process of generating a total support tree (T-tree), GUI version. @param tArea the given instance of the JTextArea class. */ public void createTotalSupportTree(JTextArea tArea) { textArea = tArea; // Calculate minimum support in terms of number of rows textArea.append("Minimum support threshold = " + twoDecPlaces(support) + "% " + "(" + twoDecPlaces(minSupport) + " records)\n"); textArea.append("Block size = " + twoDecPlaces(blockSize) + "%\n"); textArea.append("Block size = " + numRowsInBlock + " records\n"); textArea.append("Num. blocks = " + totalBlocks + "\n"); // Generate tree createTotalSupportTree2(); // Determine maximum number of levels in the DIC T-tree (for output // purposes). numLevelsInTtree = getMaxNumTtreeLevels(); textArea.append("Number of passes through data set = " + numPassesThroughDataSet + "\n"); } /* CREATE TOTAL SUPPORT TREE 2 */ /** Continues start process of generating a total support tree (T-tree).

Overides similar method in parent class. */ private void createTotalSupportTree2() { // Set number of t-tree nodes to zero (this is a static field so will // not be reset in repeat calls to the T-tree constructor). DIC_TtreeNode.setNumberOfNodesFieldToZero(); // If no data (possibly as a result of an order and pruning operation) // return if (numOneItemSets==0) return; // Initilise DIC T-tree data structure and diagnostic counters. startDIC_TtreeRef = null; numFrequentSets = 0; numUpdates = 0l; double time1 = (double) System.currentTimeMillis(); // Create Top level of DIC T-tree (First pass of dataset). createTtreeTopLevel(); // Procced dicAlgorithm(); } /* CREATE T-TREE TOP LEVEL */ /** Generates level 1 (top) of the T-tree.

Overides method in parent class. */ protected void createTtreeTopLevel() { // Dimension and initialise top level of T-tree startDIC_TtreeRef = new DIC_TtreeNode[numOneItemSets+1]; for (int index=1;index<=numOneItemSets;index++) startDIC_TtreeRef[index] = new DIC_TtreeNode(); } /* DIC ALGORITHM */ /** Procced with DIC algorithm. */ private void dicAlgorithm() { // Loop untill all item sets have been counted while (notAtEnd) { // Read first block of records readBlock(); // Update settings notAtEnd = false; updateSettings(startDIC_TtreeRef); } } /* READ BLOCK */ /** Reads a blocvk of data. */ private void readBlock() { // Calculate end index int endIndex = startIndex+numRowsInBlock; // Check end index and adjust if required if (endIndex > numRows) endIndex=numRows; // Loop through block of records for (int index=startIndex;index= linkRef.length) break; // If item set not null if (linkRef[itemSet[index]] != null) { // If item set not fully counted update if (linkRef[itemSet[index]].blockCount <= totalBlocks) { linkRef[itemSet[index]].support++; numUpdates++; } // If child node proceed down child node if (linkRef[itemSet[index]].childRef != null) updateTtree(itemSet,linkRef[itemSet[index]].childRef); } } } /*------------------------------------------------------------------------*/ /* */ /* UPDATE DIC T-TREE SETTINGS */ /* */ /*------------------------------------------------------------------------*/ /* UPDATE SETTINGS */ /** Updates the DIC T-tree settings by passing through T-tree and update status field as follows: if (0 and supported) status=1 generate new level with X-check. If new nodes added set notAtEnd flag to "true" (Flag set to false at start of method). if (all counted) if (0) status=2 if (1) status=3 else set notAtEnd flag to "true". @param linkRef the reference to the currebt location in the DIC Ttree. */ private void updateSettings(DIC_TtreeNode[] linkRef) { // Walk tree for (int index=1; index < linkRef.length; index++) { // Test for null node (node representing unsupported item set) if (linkRef[index] != null) { //Proceed down child branch if it exists if (linkRef[index].childRef != null) updateSettings(linkRef[index].childRef); // Consider generating new level else generateNewLevel(linkRef,index); // Update settings updateNodeSettings(linkRef,index); } } } /* GENERATE NEW LEVEL: */ /** Generate a new level in the DIC Ttree if no current new level exists (tested for in calling method) and node is a supported "possible small item set". */ private void generateNewLevel(DIC_TtreeNode[] linkRef, int index) { if (linkRef[index].status==0 && linkRef[index].support>=minSupport) { // Upgrade status to "possible supported set" linkRef[index].status = 1; // Generate next level in tree, if index is not 1 if (index!=1) { // Create array for next level linkRef[index].childRef = new DIC_TtreeNode[index]; // Loop through array creating nodes for(int index1=1;index1= minSupport) { short[] itemSetSoFar = new short[1]; itemSetSoFar[0] = index; generateARs(itemSetSoFar,index, startDIC_TtreeRef[index].childRef); } } } } /* GENERATE ASSOCIATION RULES */ /** Continues process of generating association rules from a DIC T-tree by recursively looping through T-tree level by level. @param itemSetSofar the label for a DIC T-tree node as generated sofar. @param size the length/size of the current array level in the DIC T-tree. @param linkRef the reference to the current array level in the DIC T-tree. */ protected void generateARs(short[] itemSetSofar, int size, DIC_TtreeNode[] linkRef) { // If no more nodes return if (linkRef == null) return; // Otherwise process for (int index=1; index < size; index++) { if (linkRef[index] != null) { if (linkRef[index].support >= minSupport) { // Temp itemset short[] tempItemSet = realloc2(itemSetSofar,(short) index); // Generate ARs for current large itemset generateARsFromItemset(tempItemSet,linkRef[index].support); // Continue generation process generateARs(tempItemSet,index,linkRef[index].childRef); } } } } /*---------------------------------------------------------------------- */ /* */ /* T-TREE SEARCH METHODS */ /* */ /*---------------------------------------------------------------------- */ /* GET SUPPORT FOT ITEM SET IN T-TREE */ /** Commences process for finding the support value for the given item set in the DIC T-tree (which is know to exist in the DIC T-tree).

Used when generating Association Rules (ARs). Note that itemsets are stored in reverse order in the T-tree therefore the given itemset must be processed in reverse. @param itemSet the given itemset. @return returns the support value (0 if not found). */ protected int getSupportForItemSetInTtree(short[] itemSet) { int endInd = itemSet.length-1; // Last element of itemset in Ttree (Note: Ttree itemsets stored in // reverse) if (startDIC_TtreeRef[itemSet[endInd]] != null) { // If "current index" is 0, then this is the last element (i.e the // input is a 1 itemset) and therefore item set found if (endInd == 0) return(startDIC_TtreeRef[itemSet[0]].support); // Otherwise continue down branch else { DIC_TtreeNode[] tempRef = startDIC_TtreeRef[itemSet[endInd]].childRef; if (tempRef != null) return(getSupForIsetInTtree2(itemSet, endInd-1,tempRef)); // No further branch therefore rerurn 0 else return(0); } } // Item set not in Ttree thererfore return 0 else return(0); } /** Returns the support value for the given itemset if found in the DIC T-tree and 0 otherwise.

Operates recursively. @param itemSet the given itemset. @param index the current index in the given itemset. @param linRef the reference to the current DIC T-tree level. @return returns the support value (0 if not found). */ private int getSupForIsetInTtree2(short[] itemSet, int index, DIC_TtreeNode[] linkRef) { // Element at "index" in item set exists in Ttree if (linkRef[itemSet[index]] != null) { // If "current index" is 0, then this is the last element of the // item set and therefore item set found if (index == 0) return(linkRef[itemSet[0]].support); // Otherwise continue provided there is a child branch to follow else if (linkRef[itemSet[index]].childRef != null) return(getSupForIsetInTtree2(itemSet,index-1, linkRef[itemSet[index]].childRef)); else return(0); } // Item set not in Ttree therefore return 0 else return(0); } /*----------------------------------------------------------------------- */ /* */ /* SET METHODS */ /* */ /*----------------------------------------------------------------------- */ /** Set block size and total number of blocks. @param bSize the given block size in terms of a percebntage of the number of available rows. */ public void setBlockSize(double bSize) { blockSize = bSize; // Calculate number of rows to ablock numRowsInBlock = (int) ((double) numRows*(blockSize/100)); // Set total number of blocks totalBlocks = numRows/numRowsInBlock; if ((numRows % numRowsInBlock) != 0) totalBlocks++; } /*----------------------------------------------------------------------- */ /* */ /* GET METHODS */ /* */ /*----------------------------------------------------------------------- */ /** Gets the reference to the start of the DIC R-tree. @return the desired reference to the start of the DIC R-tree. */ public DIC_TtreeNode[] getStartDIC_TtreeRef() { return(startDIC_TtreeRef); } /** Gets the maximum number of DIC T-tree levels. @return the maximum number of levels in the T-tree. */ private int getMaxNumTtreeLevels() { int maxLevel = 0; // Check if (startDIC_TtreeRef==null) return(maxLevel); // Loop int curentLevel=1; for (short index=1;index maxLevel) maxLevel = newLevel; } } // Return return(maxLevel); } /** Continues process if obtaining maximum number of levels in the T-tree. @param currentLevel the current level in the T-tree. @param linkRef the reference to the current level in the current branch of the T-tree. @return the maximum numbersc of levels in the current brabch of the T-tree. */ private int countLevels(int currentLevel, DIC_TtreeNode[] linkRef) { // Check for empty branch/sub-branch. if (linkRef == null) return(currentLevel-1); // Loop through current level of branch/sub-branch. int maxLevel=currentLevel; for (short index=1;index maxLevel) maxLevel = newLevel; } } // Return return(maxLevel); } /* GET NUMBER OF FREQUENT SETS */ /** Returns number of frequent/large (supported) sets in T-tree. @return the number of support sets. */ public int getNumFreqSets() { return(countNumFreqSets()); } /* --------------------------------------------------------------------- */ /* */ /* OUTPUT METHODS */ /* */ /* --------------------------------------------------------------------- */ /* Nine output options: (1) Output DIC T-tree (2) Output frequent sets (also GUI versions) (3) Output number of frequent sets (4) Output number of updates (4) DIC T-tree statistics (6) Output DIC T-tree storage */ /* ---------------- */ /* 1. OUTPUT T-TRRE */ /* ---------------- */ /* OUTPUT DIC T-TREE */ /** Commences outputs of the DIC T-tree structure */ public void outputDIC_Ttree() { int number = 1; // Start String s = "DIC T-TREE:\n-------\nFormat: [N] {I} = S, where N is " + "the node number, I is the item set and S the support.\n"; if (textArea==null) System.out.println(s); else textArea.append(s); // Check if (startDIC_TtreeRef==null) { if (textArea==null) System.out.println("Ttree empty!"); else textArea.append("Ttree empty!\n"); return; } // Loop for (short index=1;index Operates in a recursive manner. @param number the ID number of a particular node. @param itemSetSofar the label for a T-tree node as generated sofar. @param linkRef the reference to the current array level in the T-tree. */ private void outputDIC_Ttree(String number, short[] itemSetSofar, DIC_TtreeNode[] linkRef) { // Set output local variables. int num=1; number = number + "."; // Check for empty branch/sub-branch. if (linkRef == null) return; // Loop through current level of branch/sub-branch. for (short index=1;index= minSupport) { short[] itemSetSofar = new short[1]; itemSetSofar[0] = index; outputDIC_TtreeNode(new Integer(number).toString(), itemSetSofar,startDIC_TtreeRef[index].support); number = outputFrequentSets(number+1,itemSetSofar, index,startDIC_TtreeRef[index].childRef); } } } // End System.out.println("\n"); } /** Outputs DIC T-tree frequent sets.

Operates in a recursive manner. @param number the number of frequent sets so far. @param itemSetSofar the label for a T-treenode as generated sofar. @param size the length/size of the current array level in the T-tree. @param linkRef the reference to the current array level in the T-tree. @return the incremented (possibly) number the number of frequent sets so far. */ private int outputFrequentSets(int number, short[] itemSetSofar, int size, DIC_TtreeNode[] linkRef) { // No more nodes if (linkRef == null) return(number); // Otherwise process for (short index=1; index < size; index++) { if (linkRef[index] != null) { if (linkRef[index].support >= minSupport) { short[] newItemSet = realloc2(itemSetSofar,index); outputDIC_TtreeNode(new Integer(number).toString(), newItemSet,linkRef[index].support); number = outputFrequentSets(number + 1,newItemSet,index, linkRef[index].childRef); } } } // Return return(number); } /* OUTPUT FREQUENT SETS */ /** Commences the process of outputting the frequent sets contained in the DIC T-tree as series of schema labels. */ public void outputFrequentSetsSchema() { int number = 1; String s = "Format: [N] {I} = S, where N is a sequential " + "number, I is the item set and S the support.\n"; if (textArea==null) System.out.print(s); else textArea.append(s); // Loop for (short index=1; index <= numOneItemSets; index++) { if (startDIC_TtreeRef[index] !=null) { if (startDIC_TtreeRef[index].support >= minSupport) { short[] itemSetSofar = new short[1]; itemSetSofar[0] = index; outputDIC_TtreeNodeSchema(new Integer(number).toString(), itemSetSofar,startDIC_TtreeRef[index].support); number = outputFrequentSetsSchema(number+1,itemSetSofar, index,startDIC_TtreeRef[index].childRef); } } } // End System.out.println("\n"); } /** Outputs DIC T-tree frequent sets.

Operates in a recursive manner. @param number the number of frequent sets so far. @param itemSetSofar the label for a T-treenode as generated sofar. @param size the length/size of the current array level in the T-tree. @param linkRef the reference to the current array level in the T-tree. @return the incremented (possibly) number the number of frequent sets so far. */ private int outputFrequentSetsSchema(int number, short[] itemSetSofar, int size, DIC_TtreeNode[] linkRef) { // No more nodes if (linkRef == null) return(number); // Otherwise process for (short index=1; index < size; index++) { if (linkRef[index] != null) { if (linkRef[index].support >= minSupport) { short[] newItemSet = realloc2(itemSetSofar,index); outputDIC_TtreeNodeSchema(new Integer(number).toString(), newItemSet,linkRef[index].support); number = outputFrequentSetsSchema(number + 1,newItemSet, index,linkRef[index].childRef); } } } // Return return(number); } /* ------------------------------ */ /* 3. OUTPUT NUMBER FREQUENT SETS */ /* ------------------------------ */ /** Commences the process of counting and outputing number of supported nodes in the T-tree.

A supported set is assumed to be a non null node in the T-tree. */ public void outputNumFreqSets() { // If empty tree (i.e. no supported sets) do nothing if (startDIC_TtreeRef== null) { String s = "Number of frequent sets = 0\n"; if (textArea==null) System.out.println(s); else textArea.append(s); } // Otherwise count and output else { String s = "Number of frequent sets = " + countNumFreqSets() + "\n"; if (textArea==null) System.out.println(s); else textArea.append(s); } } /* COUNT NUMBER OF FRQUENT SETS */ /** Commences process of counting the number of frequent (large/supported) sets contained in the DIC T-tree. */ protected int countNumFreqSets() { // If empty tree return 0 if (startDIC_TtreeRef == null) return(0); // Otherwise loop through T-tree starting with top level int num=0; for (int index=1; index <= numOneItemSets; index++) { // Check for null valued top level Ttree node. if (startDIC_TtreeRef[index] !=null) { if (startDIC_TtreeRef[index].support >= minSupport) num = countNumFreqSets(index, startDIC_TtreeRef[index].childRef,num+1); } } // Return return(num); } /** Counts the number of supported nodes in a sub branch of the DIC T-tree. @param size the length/size of the current array level in the DIC T-tree. @param linkRef the reference to the current array level in the DIC T-tree. @param num the number of frequent sets sofar. */ protected int countNumFreqSets(int size, DIC_TtreeNode[] linkRef, int num) { if (linkRef == null) return(num); for (int index=1; index < size; index++) { if (linkRef[index] != null) { if (linkRef[index].support >= minSupport) num = countNumFreqSets(index, linkRef[index].childRef,num+1); } } // Return return(num); } /* --------------------------- */ /* 4. OUTPUT NUMBER OF UPDATES */ /* --------------------------- */ /* OUTPUT NUMBER UPDATES */ /** Outputs the number of update and number of nodes created during the generation of the T-tree (the later is not the same as the number of supported nodes). */ public void outputNumUpdates() { System.out.println("Number of DIC T-tree nodes created = " + DIC_TtreeNode.getNumberOfNodes()); System.out.println("Number of DIC T-tree Updates = " + numUpdates); } /* --------------------------- */ /* 5. OUTPUT T-TREE STATISTICS */ /* --------------------------- */ /** Commences the process of outputting T-tree statistics (for diagnostic purposes): (a) Storage, (b) Number of nodes on P-tree, (c) number of partial support increments (updates) and (d) generation time. */ public void outputTtreeStats() { if (textArea==null) { System.out.println("DIC T-TREE STATISTICS\n---------------------"); System.out.println(numLevelsInTtree + " Levels in DIC T-tree"); System.out.println(DIC_TtreeNode.getNumberOfNodes() + " Total # nodes created"); System.out.println(numUpdates + " Total # support value " + "increments"); System.out.println(countNumFreqSets() + " Final # frequent sets"); System.out.println(calculateStorage() + " Total storage (Bytes)" + " on completion"); System.out.println("-----------------------------------"); } else outputTtreeStats(textArea); } /** Commences the process of outputting T-tree statistics (GUI version). @param textArea the given instance of the JTextArea class. */ public void outputTtreeStats(JTextArea textArea) { textArea.append("DIC T-TREE STATISTICS\n---------------------\n"); textArea.append(numLevelsInTtree + " Levels in DIC T-tree\n"); textArea.append(DIC_TtreeNode.getNumberOfNodes() + " Total # nodes " + "created\n"); textArea.append(numUpdates + " Total # support value increments\n"); textArea.append(countNumFreqSets() + " Final # frequent sets\n"); textArea.append(calculateStorage() + " Total storage (Bytes)" + " on completion\n"); textArea.append("-----------------------------------\n"); } /* ----------------- */ /* 6. OUTPUT STORAGE */ /* ----------------- */ /** Commences the process of determining and outputting the storage requirements (in bytes) for the T-tree. */ public void outputStorage() { // If empty tree do nothing if (startDIC_TtreeRef == null) return; /* Otherwise calculate storage */ String s = "T-tree Storage = " + calculateStorage() + " (Bytes)\n"; if (textArea==null) System.out.print(s); else textArea.append(s); } /* CALCULATE STORAGE */ /** Commences process of calculating storage requirements for T-tree. */ protected int calculateStorage() { // If emtpy tree (i.e. no supported sets) return 0 if (startDIC_TtreeRef == null) return(0); /* Step through top level */ int storage = 4; // For element 0 for (int index=1; index <= numOneItemSets; index++) { if (startDIC_TtreeRef[index] !=null) storage = storage + 18 + calculateStorage(0,startDIC_TtreeRef[index].childRef); else storage = storage+4; } // Return return(storage); } /** Calculate storage requirements for a sub-branch of the DIC T-tree. @param localStorage the storage as calculated sofar (set to 0 at start). @param linkRef the reference to the current sub-branch of the DIC T-tree. */ private int calculateStorage(int localStorage, DIC_TtreeNode[] linkRef) { if (linkRef == null) return(0); // Loop through current level in current sub-nranch of DIC T-tree for (int index=1; index < linkRef.length; index++) { if (linkRef[index] !=null) localStorage = localStorage + 18 + calculateStorage(0,linkRef[index].childRef); else localStorage = localStorage + 4; } /* Return */ return(localStorage+4); // For element 0 } }