/* -------------------------------------------------------------------------- */ /* */ /* TOTAL SUPPORT TREE NEGATIVE BORDER */ /* */ /* Frans Coenen */ /* */ /* 10 January 2003 */ /* (Revisions 16/1/2003, 7/2/2003, 14/3/2003, 6/6/2003, 1/7/2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class Hierarchy: AssocRuleMining | +-- TotalSupportTree | +-- TotalSupportTreeNegBorder */ //package lucsKDD_ARM; /* Java packages */ import java.io.*; // Java GUI packages import javax.swing.*; /** Methods to implement the "Apriori-T" algorithm using a "Total support" tree data structure (T-tree) without X-checking and with the negative boarder concept such that the leaf nodes represent itemset that are just NOT supported.

In the "standard" T-tree the leaf nodes represent those itemsets which are just supported. Methods in this class are used to implement Toivonen's sampling ARM approach and distributed Apriori-T algorithms. @author Frans Coenen @version 5 July 2003 */ public class TotalSupportTreeNegBorder extends TotalSupportTree { /* ------ FIELDS ------ */ // CONSTANTS /** The start row identifier for a segement. */ protected static int START_SEGMENT = 0; // VARIABLE FIELDS /** Value by which minimum support threshold should be decreased to ensure that no frequent set are omitted. */ protected double supportReductionValue = 0.1; // Percent /** The global minsupport value held while some alternative is temporarily used in connection with the "negative boarder" approach. */ protected double tempMinSupport; /** Percentage of record to be included in preliminary segment. */ private static double PRELIMINARY_SIZE_PRECENTAGE = 0.25; // DATA STRUCTURES /** 2-D array to hold preliminary segement of input data. */ protected short[][] prelimSegDataArray = null; // FLAGS /** Temporary minimum support threshold reduced by one record (used for output only). */ private boolean reducedByOneFlag = false; /** Temporary minimum support threshold set to one record (used for output only). */ private boolean setToOneFlag = false; /* ------ CONSTRUCTORS ------ */ /** Processes command line arguments. */ public TotalSupportTreeNegBorder(String[] args) { super(args); } /** With argument from existing instance of class AssocRuleMining. */ public TotalSupportTreeNegBorder(AssocRuleMining armInstance) { super(armInstance); } /** Default constructor. */ public TotalSupportTreeNegBorder() { } /* ------ METHODS ------ */ /*----------------------------------------------------------------------- */ /* */ /* TREE BUILDING METHODS */ /* (Negative boarder applied to entire tree) */ /* WARNINMG: THIS IS NOT USUALLY DONE SO */ /* PROBABLY SAFE TO IGNRE THIS BIT OF CODING! */ /* */ /*----------------------------------------------------------------------- */ /* CREATE TOTAL SUPPORT TREE */ /** Commences start process of generating a total support tree (T-tree) with negative boarder concept applied to the entire tree.

Overridies parent method. */ public void createTotalSupportTree() { System.out.println("APRIORI-T WITH NEGATIVE BOARDER\n" + "-------------------------------"); System.out.println("Negative boarder concept applied to entire tree."); System.out.println("Minimum support threshold = " + support + "% " + "(" + minSupport + " records)"); // Recalculate minimum support so that it is temporarily lowered to // provide a "safety margin" when using the negative boarder principal double time1 = (double) System.currentTimeMillis(); calcReducedMinSup(); outputMinSupReduction(); // Continue createTotalSupportTree2(); duration = getDuration(time1,(double) System.currentTimeMillis()); } /** Commences start process of generating a total support tree (T-tree) using negative boarder concept applied to the entire tree, GUI version.

Overridies parent method. @param textArea the given instance of the JTextArea class. */ public void createTotalSupportTree(JTextArea textArea) { textArea.append("Negative boarder concept applied to entire tree.\n"); textArea.append("Minimum support threshold = " + support + "% " + "(" + minSupport + " records)\n"); // Recalculate minimum support so that it is temporarily lowered to // provide a "safety margin" when using the negative boarder principal double time1 = (double) System.currentTimeMillis(); calcReducedMinSup(); outputMinSupReduction(textArea); // Continue createTotalSupportTree2(); duration = getDuration(time1,(double) System.currentTimeMillis()); } /** Continues start process of generating a total support tree (T-tree) using negative boarder concept. */ private void createTotalSupportTree2() { // Create Top level of T-tree (First pass of dataset) createTtreeTopLevel(); // Generate level 2 (including negative boarder concept) generateLevel2(); // Further passes of the dataset createTtreeLevelN(); // Parent method // Reset minimum support level minSupport = tempMinSupport; } /*----------------------------------------------------------------------- */ /* */ /* TREE BUILDING METHODS */ /* (Negative boarder applied to segment of tree) */ /* */ /*----------------------------------------------------------------------- */ /* CREATE TOTAL SUPPORT TREE */ /** Commences process of generating a total support tree (T-tree) using negative boarder concept for a given segment of the data. */ public void createTotalSupportTreeNB() { System.out.println("Apriori-T with Negative Border\n" + "------------------------------"); System.out.println("Minimum support threshold = " + support + "% " + "(" + minSupport + " records)"); // Recast input data into a preliminary segment and the remainder. pruneInputData(); System.out.println(prelimSegDataArray.length + " size of start " + "segment (" + (PRELIMINARY_SIZE_PRECENTAGE*100) + "% of total # records)"); // Recalculate minimum support so that it takes into consideration // the reduced number of records due to segmentation and so that it // is temporarily lowered to provide a "safety margin" when using // the negative boarder principal calcReducedMinSup(START_SEGMENT,prelimSegDataArray.length); outputMinSupReduction(); // Create initial negative boarder T-tree createPreliminaryTtreeWithNB(); // Create Top level of T-tree (First pass of dataset) addSupportToTtreeIfExists(); // Output int numFreqSetsOnNB = getNumFreqSetsOnNB(); if (numFreqSetsOnNB>0) System.out.println("WARNING: "); System.out.println("Number of frequent sets on negative boarder = " + numFreqSetsOnNB + "\n"); } /** Commences process of generating a total support tree (T-tree) using negative boarder concept for a given segment of the data. @param textArea the given instance of the JTextArea class. */ public void createTotalSupportTreeNB(JTextArea textArea) { textArea.append("Minimum support threshold = " + support + "% " + "(" + minSupport + " records)\n"); // Recast input data into a preliminary segment and the remainder. pruneInputData(); textArea.append(prelimSegDataArray.length + " size of start " + "segment (" + (PRELIMINARY_SIZE_PRECENTAGE*100) + "% of total # records)\n"); // Recalculate minimum support so that it takes into consideration // the reduced number of records due to segmentation and so that it is // temporarily lowered to provide a "safety margin" when using the // negative boarder principal calcReducedMinSup(START_SEGMENT,prelimSegDataArray.length); outputMinSupReduction(textArea); // Create initial negatice boarder T-tree createPreliminaryTtreeWithNB(); // Create Top level of T-tree (First pass of dataset) addSupportToTtreeIfExists(); // Output int numFreqSetsOnNB = getNumFreqSetsOnNB(); if (numFreqSetsOnNB>0) textArea.append("WARNING: "); textArea.append("Number of frequent sets on negative boarder = " + numFreqSetsOnNB + "\n"); } /* CREATE PRELIMINARY TOTAL SUPPORT TREE */ /** Commences process of generating a preliminary total support tree (T-tree) using negative boarder concept. */ private void createPreliminaryTtreeWithNB() { short[][] tempDataArray = dataArray; dataArray = prelimSegDataArray; // Create Top level of T-tree (First pass of dataset) createTtreeTopLevel(); // Generate level 2 (including negative boarder concept) generateLevel2(); // Further passes of the dataset createTtreeLevelN(); // Reset minimum support level minSupport = tempMinSupport; dataArray = tempDataArray; } /* ADD SUPPORT VALUES TO T-TREE IF NODE EXISTS */ /** Commences process of adding further support values to the nodes in an existing negative boarder T-tree generated for a preliminary data segment. */ protected void addSupportToTtreeIfExists() { // Loop through data set record by record for (int index=0;index Using the negative boarder concept we do not wish to remove un supported nodes at a given level as these represent the negative boarder. Of course when generating the next level new nodes are only added from supported previous nodes. Called by parent methods: (i) createTtreeTopLevel and (ii) createTtreeLevelN. @param linkRef The reference to the current sub-branch of T-tree (start at top of tree) @param level the level marker, set to the required level at the start and then decremented by 1 on each recursion. @return true if all nodes at a given level in the given branch of the T-tree have been pruned, false otherwise. */ protected boolean pruneLevelN(TtreeNode [] linkRef, int level) { return(false); // Default } /* PRUNE T-TREE */ /** Commences process of pruning the final negative boarder T-tree. */ protected void pruneTtree() { pruneTtree(startTtreeRef); } /** Prunes the final negative boarder T-tree.

Operates in a recursive manner. @param linkRef The reference to the current sub-branch of T-tree (start at top of tree). */ protected void pruneTtree(TtreeNode[] linkRef) { // Check for empty branch if (linkRef != null) { // Loop through T-tree branch for(int index=1;index Overrides parent method which does not use the negative boarder concept, therefore if a node does not exist it is not supported. When using the negative boarder two types of node are included in the T-tree: (i) supported nodes as in the standard T-tree and (ii) unsupported nodes representing the negative board. Note also that the general generateLevelN method assumes we have to first find the right level in the T-tree, that is not necessary in this case of level 2. */ protected void generateLevel2() { // Set next level flag nextLevelExists=false; // loop through top level for (int index=2;index= minSupport) generateNextLevel(startTtreeRef,index, realloc2(null,(short) index)); } } /* GENERATE LEVEL N */ /** Commences process of generating remaining levels in the T-tree (other than top and 2nd levels).

Proceeds in a recursive manner level by level until the required level is reached. Example, if we have a T-tree of the form:

    (A) ----- (B) ----- (C)
               |         |
	       |         |
	      (A)       (A) ----- (B)
    

Where all nodes are supported and we wish to add the third level we would walk the tree and attempt to add new nodes to every level 2 node found. Having found the correct level we step through starting from B (we cannot add a node to A), so in this case there is only one node from which a level 3 node may be attached. @param linkRef the reference to the current sub-branch of T-tree (start at top of tree) @param level the level marker, set to the required level at the start and then decremented by 1 on each recursion. @param itemSet the current itemset under consideration. */ protected void generateLevelN(TtreeNode[] linkRef, int level, short[] itemSet) { int localSize = linkRef.length; // Correct level if (level == 1) { // Step through T-tree array in current branch at current level for (int index=2;index= minSupport)) generateNextLevel(linkRef,index, realloc2(itemSet,(short) index)); } } // Wrong level else { for (int index=level;index Overrides parent method which includes X-checking. @param parentRef the reference to the current sub-branch of T-tree (start at top of tree) @param endIndex the last index of the parent level. @param itemSet the complete label representing the current node (not used by this method, but required by the parent method which this method overwrites). */ protected void generateNextLevel(TtreeNode[] parentRef, int endIndex, short[] itemSet) { parentRef[endIndex].childRef = new TtreeNode[endIndex]; // New level // Generate a level in Ttree TtreeNode currentNode = parentRef[endIndex]; // Loop through parent sub-level of siblings up to current node for (int index=1;indexsupportReductionValue or 1 whatever is greater.

Used when generating an entire T-tree with a negative boarder. */ protected void calcReducedMinSup() { reducedByOneFlag = false; setToOneFlag = false; // Store full minimum support tempMinSupport = minSupport; // If min support is already 1 it can not be reduced any further. if (minSupport < 1.00000001) { setToOneFlag = true; return; } // Calculate reduced value minSupport = numRows * (support-supportReductionValue/100.0)/100.0; // Check if reduction is greater than one record, if not adjust new // min support so that it is one less than the full minimum. if ((tempMinSupport-minSupport) < 1.0) { minSupport = tempMinSupport-1; reducedByOneFlag = true; } // check that reduction is not less than one record, otherwise set to // one if (minSupport<1.0) { minSupport=1.0; reducedByOneFlag = false; setToOneFlag = true; } } /** Recalculates minimum support so that it is temporarily lowered, for a given segment, by the value of the constant supportReductionValue or 1 whatever is greater.

Used when applying Toivonen's negative boarder ARM method. @param start the record number marking the start of the segment. @param end the record number marking the end of the segment. */ protected void calcReducedMinSup(int start, int end) { reducedByOneFlag = false; setToOneFlag = false; // Store full minimum support tempMinSupport = minSupport; // If min support is already 1 it can not be reduced any further. if (minSupport < 1.00000001) { setToOneFlag = true; return; } // Calculate reduced value. Support is a percentage, reduction value is // also a percentage. So if reduction value is 50% and support is 1% // then support should be reduced to 0.5%. Min support is expressed in // terms of a number of records. int numRows = end-start+1; minSupport = numRows * (support-supportReductionValue/100.0)/100.0; // Check if reduction is greater than one record, if not adjust new // min support so that it is one less than the full minimum. if ((tempMinSupport-minSupport) < 1.0) { minSupport = tempMinSupport-1; reducedByOneFlag = true; } // It does not make sense to reduce the minimum support to less than // one record, so check if min support is greater than one, if not // adjust new min support so that it is set to one. if (minSupport < 1.0) { minSupport = 1.0; reducedByOneFlag = false; setToOneFlag = true; } } /* PRUNE INPUT DATA */ /** Prunes input data set to give a preliminary set (used o determine T-tree with negative boarder), and a revised data set with out the preliminary set (used to determine additional support values). */ private void pruneInputData() { // Dimension dataset: (i) Calculate size. int sizeOfPrelimDataSet = 1 + (int) ((double) numRows* PRELIMINARY_SIZE_PRECENTAGE); prelimSegDataArray = new short[sizeOfPrelimDataSet][]; // Copy preliminary data. int index1; for (index1=0;index1= minSupport) { if (startTtreeRef[index].childRef == null) num=num+1; else num = getNumFreqSetsOnNB(index, startTtreeRef[index].childRef,num); } } } // Return return(num); } /** Counts the number of supported nodes in a sub branch of the T-tree. @param size the length/size of the current array level in the T-tree. @param linkRef the reference to the current array level in the T-tree. @param num the number of frequent sets sofar. */ private int getNumFreqSetsOnNB(int size, TtreeNode[] linkRef, int num) { // Loop through array for (int index=2;index= minSupport) { if (linkRef[index].childRef == null) num=num+1; else num = getNumFreqSetsOnNB(index,linkRef[index].childRef, num); } } } // Return return(num); } /** Outputs stored input data for the preliminary segment of the input data. */ protected void outputPrelimSegDataArray() { for(int index=0;indexJTextArea class. */ private void outputMinSupReduction(JTextArea textArea) { if (reducedByOneFlag) textArea.append("Segment minimum " + "support threshold = " + ((int) minSupport) + " records (reduced by 1)\n"); else if (setToOneFlag) textArea.append("Segment minimum " + "support threshold = " + ((int) minSupport) + " records (set to 1, can not go any lower)\n"); else textArea.append("Segment minimum support threshold = " + ((int) minSupport) + " records (support " + "reduced by " + supportReductionValue+ "%)\n"); } }