/* -------------------------------------------------------------------------- */ /* */ /* FP T R E E C L A S S */ /* */ /* Frans Coenen */ /* */ /* 25 May 2001 */ /* (Revised 5 February 2003, 3 February 2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree | +-- FPtree */ //package lucsKDD_ARM; import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Implementation of Han's FP-growth ARM algorithm. @author Frans Coenen @version 5 February 2003 */ public class FPtree extends TotalSupportTree { /* ------ FIELDS ------ */ /** FP-tree node structure comprising a FPgrowthItemPrefixSubtreeNode in which to store counts and a reference to a child branch. */ protected class FPtreeNode { /** The FP tree node. */ private FPgrowthItemPrefixSubtreeNode node = null; /** The reference to the child branch (levels in FP-tree branches are stored as a arrays of FPtreeNode structures. */ private FPtreeNode[] childRefs = null; /** Default constructor. */ protected FPtreeNode() { } /** Single argument constructor. @param newNode the reference to a new node to be included in the FP-tree.*/ protected FPtreeNode(FPgrowthItemPrefixSubtreeNode newNode) { node = newNode; } } /** Prefix subtree structure.

A set enumeration tree in which to store itemsets together with support values. */ private class FPgrowthItemPrefixSubtreeNode { /** The attribute identifier. */ private short itemName; /** The support count. */ private int itemCount; /** The backward link to the parent node in FP tree. */ private FPgrowthItemPrefixSubtreeNode parentRef = null; /** The forward link to the next node in a linked list of nodes with same attribute identifier starting with an element in the header table (array). */ private FPgrowthItemPrefixSubtreeNode nodeLink = null; /** Default constructor. */ private FPgrowthItemPrefixSubtreeNode() { } /** Three argument constructor. @param name the itemset identifier. @param support the support value for the itemset. @param backRef the backward link to the parent node. */ private FPgrowthItemPrefixSubtreeNode(short name, int support, FPgrowthItemPrefixSubtreeNode backRef) { itemName = name; itemCount = support; parentRef = backRef; } } /** Header table.

Array of these structures used to link into FP-tree. All FP-tree nodes with the same identifier are linked together starting from a node in a header table (made up of HeaderTasble structures). It is this "cross" linking that gives the FP-tree its most significant advantage. */ protected class FPgrowthHeaderTable { /** The 1-itemset (attribute) identifier. */ protected short itemName; /** The forward link to the next node in the link list of nodes. */ protected FPgrowthItemPrefixSubtreeNode nodeLink = null; // Constructors protected FPgrowthHeaderTable (short columnNum) { itemName = columnNum; } } /** Structure in which to store ancestor itemSets, thus nodes in an FP-tree that preceed the nodes identified by following a trail of links from a particular item in the header table. */ private class FPgrowthSupportedSets { /** The itemSet label. */ private short[] itemSet = null; /** The associated support value for the given itemset. */ private int support; /** The reference to the next node in a linked list. */ private FPgrowthSupportedSets nodeLink = null; /** Three argument constructor. @param newitemSet the given itemSet label. @param newSupport the associated support value for the given itemset. @param newNodeLink the reference to the next node in a linked list. */ private FPgrowthSupportedSets(short[] newitemSet, int newSupport, FPgrowthSupportedSets newNodeLink) { itemSet = newitemSet; support = newSupport; nodeLink = newNodeLink; } } /** Structure in which to store counts. */ private class FPgrowthColumnCounts { /** The column/attribute ID number. */ private short columnNum; /** The associated support value. */ private int support=0; /** One argument constructor. @param column the column/attribute ID number. */ private FPgrowthColumnCounts(int column) { columnNum = (short) column; } /** Two argument constructor. @param column the column/attribute ID number. @param sup the associatec support value. */ private FPgrowthColumnCounts(int column, int sup) { columnNum = (short) column; support = sup; } } // Data structures /** Start reference for FP-tree. */ protected FPtreeNode rootNode = null; /** Start reference for header table. */ protected FPgrowthHeaderTable[] headerTable; /** Start reference for supportedSets linked list (temporary storage only).*/ private static FPgrowthSupportedSets startTempSets = null; // Other fields /** Temporary storage for an index into an array of FP-tree nodes.

Used when reassigning child reference arrays. */ private int tempIndex = 0; /** Number of nodes created. */ private int numberOfNodes; /* ------ CONSTRUCTORS ------ */ /** Constructor to process command line argument. @param args the command line arguments. */ public FPtree(String[] args) { super(args); // Initialise root node rootNode = new FPtreeNode(); // Create header table headerTable = new FPgrowthHeaderTable[numOneItemSets+1]; // Populate header table for (int index=1;indexAssocRuleMining class. */ public FPtree(AssocRuleMining armInstance) { super(armInstance); numRows = armInstance.numRows; numCols = armInstance.numCols; support = armInstance.support; confidence = armInstance.confidence; dataArray = armInstance.dataArray; minSupport = armInstance.minSupport; numOneItemSets = armInstance.numOneItemSets; conversionArray = armInstance.conversionArray; reconversionArray = armInstance.reconversionArray; // Initialise root node rootNode = new FPtreeNode(); // Create header table headerTable = new FPgrowthHeaderTable[numOneItemSets+1]; // Populate header table for (int index=1;index If reference for current itemset found increments support count and proceed down branch, otherwise adds to current level. @param ref the current location in the FP-tree (rootNode at start). @param place the current index in the given itemset. @param itemSet the given itemset. @param support the associated support value for the given itemset. @param headerRef the link to the appropriate place in the header table. */ private void addToFPtree(FPtreeNode ref, int place, short[] itemSet, int support, FPgrowthHeaderTable[] headerRef) { if (place < itemSet.length) { if (!addToFPtree1(ref,place,itemSet,support,headerRef)) addToFPtree2(ref,place,itemSet,support,headerRef); } } /* ADD TO FP TREE 1 */ /** Searches through existing branch and if itemset found updates the support count and returns true, otherwise return false. @param ref the current FP-tree node reference. @param place the current index in the given itemset. @param itemSet the given itemset. @param support the associated support value for the given itemset. @param headerRef the link to the appropriate place in the header table. @return true if given itemset exists in FP-tree, and false otherwise. */ private boolean addToFPtree1(FPtreeNode ref, int place, short[] itemSet, int support, FPgrowthHeaderTable[] headerRef) { // Loop if (ref.childRefs != null) { for (int index=0;index Adds first attribute in itemSet and then rest of sequence. @param ref the current FP-tree node reference. @param place the current index in the given itemset. @param itemSet the given itemset. @param support the associated support value for the given itemset. @param headerRef the link to the appropriate place in the header table. */ private void addToFPtree2(FPtreeNode ref, int place, short[] itemSet, int support, FPgrowthHeaderTable[] headerRef) { // Create new Item Prefix Subtree Node FPgrowthItemPrefixSubtreeNode newPrefixNode = new FPgrowthItemPrefixSubtreeNode(itemSet[place],support,ref.node); // Create new FP tree node incorporating new Item Prefix Subtree Node FPtreeNode newFPtreeNode = new FPtreeNode(newPrefixNode); // Add link from header table addRefToFPgrowthHeaderTable(itemSet[place],newPrefixNode,headerRef); // Add into FP tree ref.childRefs = reallocFPtreeChildRefs(ref.childRefs,newFPtreeNode); // Proceed down branch with rest of itemSet addRestOfitemSet(ref.childRefs[tempIndex],newPrefixNode,place+1,itemSet, support,headerRef); } /* ADD REST OF ITEMSET */ /** Continues adding attributes in current itemset to FP-tree. @param ref the current FP-tree node reference. @param backRef the backwards link to the previous node. @param place the current index in the given itemset. @param itemSet the given itemset. @param support the associated support value for the given itemset. @param headerRef the link to the appropriate place in the header table. */ private void addRestOfitemSet(FPtreeNode ref, FPgrowthItemPrefixSubtreeNode backRef, int place, short[] itemSet, int support, FPgrowthHeaderTable[] headerRef) { // Process if more items in item set. if (place Commence with the bottom of the header table and work upwards. Working upwards from the bottom of the header table if there is a link to an FP tree node :
  1. Count the support.
  2. Build up itemSet sofar.
  3. Add to supported sets.
  4. Build a new FP tree: (i) create a new local root, (ii) create a new local header table and (iii) populate with ancestors.
  5. If new local FP tree is not empty repeat mining operation.
Otherwise end. @param tableRef the reference to the current location in the header table (commencing with the last item). @param itemSetSofar the label fot the current item sets as generated to date (null at start). */ private void startMining(FPgrowthHeaderTable[] tableRef, short[] itemSetSofar) { int headerTableEnd = tableRef.length-1; FPgrowthColumnCounts[] countArray = null; FPgrowthHeaderTable[] localHeaderTable = null; FPtreeNode localRoot; int support; short[] newCodeSofar; // Loop through header table from end to start, item by item for (int index=headerTableEnd;index>=1;index--) { // Check for null link if (tableRef[index].nodeLink != null) { // process trail of links from header table element startMining(tableRef[index].nodeLink,tableRef[index].itemName, itemSetSofar); } } } /** Commence process of mining FP tree with respect to a single element in the header table. @param nodeLink the firsty link from the header table pointing to an FP-tree node. @param itemName the label associated with the element of interest in the header table. @param itemSetSofar the item set represented by the current FP-tree. */ protected void startMining(FPgrowthItemPrefixSubtreeNode nodeLink, short itemName, short[] itemSetSofar) { // Count support for current item in header table and store a // T-tree data structure int support = genSupHeadTabItem(nodeLink); short[] newCodeSofar = realloc2(itemSetSofar,itemName); addToTtree(newCodeSofar,support); // Collect ancestor itemSets and store in linked list structure startTempSets=null; generateAncestorCodes(nodeLink); // Process Ancestor itemSets if (startTempSets != null) { // Count singles in linked list FPgrowthColumnCounts[] countArray = countFPgrowthSingles(); // Create and populate local header table FPgrowthHeaderTable[] localHeaderTable = createLocalHeaderTable(countArray); if (localHeaderTable != null) { // Prune ancestor itemSets pruneAncestorCodes(countArray); // Create new local root for local FP tree FPtreeNode localRoot = generateLocalFPtree(localHeaderTable); // Mine new FP tree startMining(localHeaderTable,newCodeSofar); } } } /* ---------------------------------------------------------------------- */ /* */ /* PROCESS CURRENT HEADER TABLE */ /* */ /* ---------------------------------------------------------------------- */ /* GENERATE SUPPORT FOR HEADER TABLE SINGLE ITEM: */ /** Counts support for single attributes in header table by following node links. @param nodeLink the start link from the header table. @return the support valye for the item set indicated by the header table. */ private int genSupHeadTabItem(FPgrowthItemPrefixSubtreeNode nodeLink) { int counter = 0; // Loop while(nodeLink != null) { counter = counter+nodeLink.itemCount; numUpdates++; nodeLink = nodeLink.nodeLink; } // Return return(counter); } /* ---------------------------------------------------------------------- */ /* */ /* ANCESTOR CODES */ /* */ /* ---------------------------------------------------------------------- */ /* GENERATE ANCESTOR CODES */ /** Generates ancestor itemSets are made up of the parent nodes of a given node. This method collects such itemSets and stores them in a linked list pointed at by startTempSets. @param ref the reference to the current node in the prefix tree containing itemsets together with support values.*/ private void generateAncestorCodes(FPgrowthItemPrefixSubtreeNode ref) { short[] ancestorCode = null; int support; // Loop while(ref != null) { support = ref.itemCount; ancestorCode = getAncestorCode(ref.parentRef); // Add to linked list with current support if (ancestorCode != null) startTempSets = new FPgrowthSupportedSets(ancestorCode,support, startTempSets); // Next ref ref = ref.nodeLink; } } /* GET ANCESTOR CODE */ /** Generate the ancestor itemSet from a given node. @param ref the reference to the current node in the prefix tree containing itemsets together with support values. */ private short[] getAncestorCode(FPgrowthItemPrefixSubtreeNode ref) { short[] itemSet = null; if (ref == null) return(null); // Else process while (ref != null) { itemSet = realloc2(itemSet,ref.itemName); ref = ref.parentRef; } // Return return(itemSet); } /* PRUNE ANCESTOR CODES */ /** Removes elements in ancestor itemSets (pointed at by startTempSets) which are not supported by referring to count array (which contains all the current supported 1 itemsets). @param countArray the array of FPgrowthColumnCounts structures describing the single item sets (in terms of labels and associated support), contained in a linked list of FPgrowthSupportedSets which in turn describe the ancestor nodes in an FP-tree that preceed the nodes identified by following a trail of links from a particular item in the header table. */ private void pruneAncestorCodes(FPgrowthColumnCounts[] countArray) { FPgrowthSupportedSets ref = startTempSets; // Loop through linked list of ancestor paths while(ref != null) { for(int index=0;indexFPgrowthColumnCounts structures describing the single item sets (in terms of labels and associated support), contained in a linked list of FPgrowthSupportedSets which in turn describe the ancestor nodes in an FP-tree that preceed the nodes identified by following a trail of links from a particular item in the header table. */ private FPgrowthColumnCounts[] countFPgrowthSingles() { int index, place=0; FPgrowthSupportedSets nodeLink = startTempSets; // Start of linked list // Dimension array, assume all attributes present, then it will // be possible to index in to the array. FPgrowthColumnCounts[] countArray = new FPgrowthColumnCounts[numOneItemSets+1]; // Initialise array for (index=1;index= minSupport) counter++; } // Build new Header Table array containing only supported items if (counter == 1) return(null); FPgrowthHeaderTable[] localHeaderTable = new FPgrowthHeaderTable[counter]; // Populate header table int place=1; for (int index=1;index= minSupport) { localHeaderTable[place] = new FPgrowthHeaderTable((short) countArray[index].columnNum); place++; } } // Return return(localHeaderTable); } /* ORDER LOCAL HEADER TABLE */ /** Orders local header table (currently unused). @param localHeaderTable the FPgrpwth header table to be ordered. @param countArray the csupport for the 1 item sets. */ private void orderLocalHeaderTable(FPgrowthHeaderTable[] localHeaderTable, FPgrowthColumnCounts[] countArray) { boolean isOrdered; FPgrowthHeaderTable temp; int index, place1, place2; //loop through table do { index = 1; isOrdered=true; while (index < (localHeaderTable.length-1)) { place1 = localHeaderTable[index].itemName; place2 = localHeaderTable[index+1].itemName; if (countArray[place1].support > countArray[place2].support) { isOrdered=false; // Swap temp = localHeaderTable[index]; localHeaderTable[index] = localHeaderTable[index+1]; localHeaderTable[index+1] = temp; } // increment index index++; } } while (isOrdered==false); } /* ---------------------------------------------------------------------- */ /* */ /* GENERATE NEW FP-TREE */ /* */ /* ---------------------------------------------------------------------- */ /* GENERATE LOCAL FP-tree */ /** Generates a local FP tree @param tableRef reference to start of header table containing links to an FP-tree produced during the FP-tree generation process. @return reference to the start of the generated FP-tree*/ private FPtreeNode generateLocalFPtree(FPgrowthHeaderTable[] tableRef) { FPgrowthSupportedSets ref = startTempSets; FPtreeNode localRoot = new FPtreeNode(); // Loop while(ref != null) { // Add to conditional FP tree if (ref.itemSet != null) addToFPtree(localRoot,0,ref.itemSet, ref.support,tableRef); ref = ref.nodeLink; } // Return return(localRoot); } /* ---------------------------------------------------------- */ /* */ /* FP-TREE UTILITIES */ /* */ /* ---------------------------------------------------------- */ /* REALLOC 1 FP-TREE */ /** Resizes the given array of FP-tree nodes so that its length is increased by one element and new element inserted. @param oldArray the given array of FP-tree nodes. @param newNode the given node to be added to the FP-tree @return The revised array of FP-tree nodes. */ private FPtreeNode[] reallocFPtreeChildRefs(FPtreeNode[] oldArray, FPtreeNode newNode) { // No old array if (oldArray == null) { FPtreeNode[] newArray = {newNode}; tempIndex = 0; return(newArray); } // Otherwise create new array with length one greater than old array int oldArrayLength = oldArray.length; FPtreeNode[] newArray = new FPtreeNode[oldArrayLength+1]; // Insert new node in correct lexicographic order. for (int index1=0;index1 < oldArrayLength;index1++) { if (newNode.node.itemName < oldArray[index1].node.itemName) { newArray[index1] = newNode; for (int index2=index1;index2JTextArea class. */ public void outputItemPrefixSubtree(JTextArea textArea) { int flag; textArea.append("PREFIX SUBTREE FROM HEADER TABLE\n"); textArea.append("Format: Header N, then list (N) itemSet:count\n"); for(int index=1;indexJTextArea class. */ /*private void outputItemPrefixSubtree(JTextArea textArea, FPgrowthHeaderTable[] tableRef) { int flag; textArea.append("PREFIX SUBTREE FROM LOCAL HEADER TABLE\n"); textArea.append("Format: Header N, then list (N) itemSet:count\n"); for(int index=1;indexJTextArea class. @param ref the reference to the given branch. @return a counter representing the current "node number" (used in output). */ private int outputItemPrefixTree(JTextArea textArea, FPgrowthItemPrefixSubtreeNode ref) { int counter = 1; // Loop while (ref != null) { textArea.append("(" + counter + ") " + (reconvertItem(ref.itemName)) + ":" + ref.itemCount + " "); counter++; ref = ref.nodeLink; } return(counter); } /*-------------------*/ /* 2. OUTPUT FP TREE */ /*-------------------*/ /** Commences process of outputting FP-tree to screen. */ public void outputFPtree() { System.out.println("FP TREE"); outputFPtreeNode1(); System.out.println(); } /** Continues process of outputting FP-tree to screen. */ private void outputFPtreeNode1() { outputFPtreeNode2(rootNode.childRefs,""); } /** Outputs a given level in an FP-tree to the screen. @param ref the reference to the given FP-tree level. @param nodeID the root string for the node ID. */ private void outputFPtreeNode2(FPtreeNode ref[],String nodeID) { if (ref == null) return; // Otherwise process for (int index=0;indexJTextArea class. */ public void outputFPtree(JTextArea textArea) { outputFPtreeNode1(textArea); textArea.append("\n"); } /** Continues process of outputting FP-tree to screen (GUI version). @param textArea the given instance of the JTextArea class. */ private void outputFPtreeNode1(JTextArea textArea) { outputFPtreeNode2(textArea,rootNode.childRefs,""); } /** Outputs a given level in an FP-tree to the screen (GUI version). @param textArea the given instance of the JTextArea class. @param ref the reference to the given FP-tree level. @param nodeID the root string for the node ID. */ private void outputFPtreeNode2(JTextArea textArea,FPtreeNode ref[], String nodeID) { if (ref == null) return; // Otherwise process for (int index=0;indexJTextArea class. @param ref the reference to the given node. */ public void outputItemPrefixSubtreeNode(JTextArea textArea, FPgrowthItemPrefixSubtreeNode ref) { textArea.append((reconvertItem(ref.itemName)) + ":" + ref.itemCount); if (ref.nodeLink != null) { textArea.append(" (ref to " + (reconvertItem(ref.nodeLink.itemName)) + ":" + ref.nodeLink.itemCount + ")\n"); } else textArea.append(" (ref to null)\n"); } /** Commences process of outputting a given branch of an FP-tree to the screen (used for diagnostic purposes during development only). @param ref the reference to the given FP-tree branch. */ private void outputFPtreeNode(FPtreeNode ref) { System.out.println("LOCAL FP TREE"); outputFPtreeNode2(ref.childRefs,""); System.out.println(); } /*------------------------------*/ /* 3. OUTPUT FP-TREE STATISTICS */ /*------------------------------*/ /* OUTPUT FP TREE STORAGE: */ /** Commence process of determining and outputting FP-tree storage, number of updates and number of nodes. */ public void outputFPtreeStorage() { int storage=8; // 8 Bytes for root node numberOfNodes = 1; // For root node storage = calculateStorage(rootNode.childRefs,storage); // Add header table. storage = storage + (headerTable.length*6); System.out.println("FP tree storage = " + storage + " (bytes)"); System.out.println("FP tree updates = " + numUpdates); System.out.println("FP tree nodes = " + numberOfNodes); } /** Commence process of determining and outputting FP-tree storage, number of updates and number of nodes (GUI version). @param textArea the given instance of the JTextArea class. */ public void outputFPtreeStats(JTextArea textArea) { int storage = 8; // 8 Bytes for root node numberOfNodes = 1; // For root node storage = calculateStorage(rootNode.childRefs,storage); // Add header table. storage = storage + (headerTable.length*6); textArea.append("FP tree storage = " + storage + " (bytes)\n"); textArea.append("FP tree updates = " + numUpdates + "\n"); textArea.append("FP tree nodes = " + numberOfNodes + "\n"); } /* CALCULATE STORAGE */ /** Determines storage requirements for FP-tree. @param ref the reference to the current portion of the P-tree under consideration. @param storage the storage requirements so far. @return the storage in Bytes required for the given FP=tree node.*/ private int calculateStorage(FPtreeNode[] ref, int storage) { if (ref == null) return(storage); // Process, each node has 14+8 bytes of storage, 8 for FP tree // links (child and sibling), 14 for prefix tree links (parentRef, // nodeRef, Support, ID for (int index=0;indexFPgrowthColumnCounts structures describing the single item sets (in terms of labels and associated support), contained in a linked list of FPgrowthSupportedSets which in turn describe the ancestor nodes in an FP-tree that preceed the nodes identified by following a trail of links from a particular item in the header table. */ private void outputColumnCount(FPgrowthColumnCounts[] countArray) { for(int index=1;index