/* -------------------------------------------------------------------------- */
/* */
/* FP T R E E C L A S S */
/* */
/* Frans Coenen */
/* */
/* 25 May 2001 */
/* (Revised 5 February 2003, 3 February 2006) */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
/* Structure:
AssocRuleMining
|
+-- TotalSupportTree
|
+-- FPtree */
//package lucsKDD_ARM;
import java.io.*;
import java.util.*;
// Java GUI packages
import javax.swing.*;
/** Implementation of Han's FP-growth ARM algorithm.
@author Frans Coenen
@version 5 February 2003 */
public class FPtree extends TotalSupportTree {
/* ------ FIELDS ------ */
/** FP-tree node structure comprising a FPgrowthItemPrefixSubtreeNode in
which to store counts and a reference to a child branch. */
protected class FPtreeNode {
/** The FP tree node. */
private FPgrowthItemPrefixSubtreeNode node = null;
/** The reference to the child branch (levels in FP-tree branches are
stored as a arrays of FPtreeNode structures. */
private FPtreeNode[] childRefs = null;
/** Default constructor. */
protected FPtreeNode() {
}
/** Single argument constructor.
@param newNode the reference to a new node to be included in the
FP-tree.*/
protected FPtreeNode(FPgrowthItemPrefixSubtreeNode newNode) {
node = newNode;
}
}
/** Prefix subtree structure.
A set enumeration tree in which to store
itemsets together with support values. */
private class FPgrowthItemPrefixSubtreeNode {
/** The attribute identifier. */
private short itemName;
/** The support count. */
private int itemCount;
/** The backward link to the parent node in FP tree. */
private FPgrowthItemPrefixSubtreeNode parentRef = null;
/** The forward link to the next node in a linked list of nodes with
same attribute identifier starting with an element in the header table
(array). */
private FPgrowthItemPrefixSubtreeNode nodeLink = null;
/** Default constructor. */
private FPgrowthItemPrefixSubtreeNode() {
}
/** Three argument constructor.
@param name the itemset identifier.
@param support the support value for the itemset.
@param backRef the backward link to the parent node. */
private FPgrowthItemPrefixSubtreeNode(short name, int support,
FPgrowthItemPrefixSubtreeNode backRef) {
itemName = name;
itemCount = support;
parentRef = backRef;
}
}
/** Header table.
Array of these structures used to link into FP-tree.
All FP-tree nodes with the same identifier are linked together starting
from a node in a header table (made up of HeaderTasble structures).
It is this "cross" linking that gives the FP-tree its most significant
advantage. */
protected class FPgrowthHeaderTable {
/** The 1-itemset (attribute) identifier. */
protected short itemName;
/** The forward link to the next node in the link list of nodes. */
protected FPgrowthItemPrefixSubtreeNode nodeLink = null;
// Constructors
protected FPgrowthHeaderTable (short columnNum) {
itemName = columnNum;
}
}
/** Structure in which to store ancestor itemSets, thus nodes in an FP-tree
that preceed the nodes identified by following a trail of links from a
particular item in the header table. */
private class FPgrowthSupportedSets {
/** The itemSet label. */
private short[] itemSet = null;
/** The associated support value for the given itemset. */
private int support;
/** The reference to the next node in a linked list. */
private FPgrowthSupportedSets nodeLink = null;
/** Three argument constructor.
@param newitemSet the given itemSet label.
@param newSupport the associated support value for the given itemset.
@param newNodeLink the reference to the next node in a linked list. */
private FPgrowthSupportedSets(short[] newitemSet, int newSupport,
FPgrowthSupportedSets newNodeLink) {
itemSet = newitemSet;
support = newSupport;
nodeLink = newNodeLink;
}
}
/** Structure in which to store counts. */
private class FPgrowthColumnCounts {
/** The column/attribute ID number. */
private short columnNum;
/** The associated support value. */
private int support=0;
/** One argument constructor.
@param column the column/attribute ID number. */
private FPgrowthColumnCounts(int column) {
columnNum = (short) column;
}
/** Two argument constructor.
@param column the column/attribute ID number.
@param sup the associatec support value. */
private FPgrowthColumnCounts(int column, int sup) {
columnNum = (short) column;
support = sup;
}
}
// Data structures
/** Start reference for FP-tree. */
protected FPtreeNode rootNode = null;
/** Start reference for header table. */
protected FPgrowthHeaderTable[] headerTable;
/** Start reference for supportedSets linked list (temporary storage
only).*/
private static FPgrowthSupportedSets startTempSets = null;
// Other fields
/** Temporary storage for an index into an array of FP-tree nodes.
Used when reassigning child reference arrays. */
private int tempIndex = 0;
/** Number of nodes created. */
private int numberOfNodes;
/* ------ CONSTRUCTORS ------ */
/** Constructor to process command line argument.
@param args the command line arguments. */
public FPtree(String[] args) {
super(args);
// Initialise root node
rootNode = new FPtreeNode();
// Create header table
headerTable = new FPgrowthHeaderTable[numOneItemSets+1];
// Populate header table
for (int index=1;indexAssocRuleMining
class. */
public FPtree(AssocRuleMining armInstance) {
super(armInstance);
numRows = armInstance.numRows;
numCols = armInstance.numCols;
support = armInstance.support;
confidence = armInstance.confidence;
dataArray = armInstance.dataArray;
minSupport = armInstance.minSupport;
numOneItemSets = armInstance.numOneItemSets;
conversionArray = armInstance.conversionArray;
reconversionArray = armInstance.reconversionArray;
// Initialise root node
rootNode = new FPtreeNode();
// Create header table
headerTable = new FPgrowthHeaderTable[numOneItemSets+1];
// Populate header table
for (int index=1;index If reference for current itemset found increments support count and
proceed down branch, otherwise adds to current level.
@param ref the current location in the FP-tree (rootNode at start).
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table. */
private void addToFPtree(FPtreeNode ref, int place, short[] itemSet,
int support, FPgrowthHeaderTable[] headerRef) {
if (place < itemSet.length) {
if (!addToFPtree1(ref,place,itemSet,support,headerRef))
addToFPtree2(ref,place,itemSet,support,headerRef);
}
}
/* ADD TO FP TREE 1 */
/** Searches through existing branch and if itemset found updates the
support count and returns true, otherwise return false.
@param ref the current FP-tree node reference.
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table.
@return true if given itemset exists in FP-tree, and false otherwise. */
private boolean addToFPtree1(FPtreeNode ref, int place, short[] itemSet,
int support, FPgrowthHeaderTable[] headerRef) {
// Loop
if (ref.childRefs != null) {
for (int index=0;index Adds first attribute in itemSet and then
rest of sequence.
@param ref the current FP-tree node reference.
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table. */
private void addToFPtree2(FPtreeNode ref, int place, short[] itemSet,
int support, FPgrowthHeaderTable[] headerRef) {
// Create new Item Prefix Subtree Node
FPgrowthItemPrefixSubtreeNode newPrefixNode = new
FPgrowthItemPrefixSubtreeNode(itemSet[place],support,ref.node);
// Create new FP tree node incorporating new Item Prefix Subtree Node
FPtreeNode newFPtreeNode = new FPtreeNode(newPrefixNode);
// Add link from header table
addRefToFPgrowthHeaderTable(itemSet[place],newPrefixNode,headerRef);
// Add into FP tree
ref.childRefs = reallocFPtreeChildRefs(ref.childRefs,newFPtreeNode);
// Proceed down branch with rest of itemSet
addRestOfitemSet(ref.childRefs[tempIndex],newPrefixNode,place+1,itemSet,
support,headerRef);
}
/* ADD REST OF ITEMSET */
/** Continues adding attributes in current itemset to FP-tree.
@param ref the current FP-tree node reference.
@param backRef the backwards link to the previous node.
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table. */
private void addRestOfitemSet(FPtreeNode ref, FPgrowthItemPrefixSubtreeNode backRef,
int place, short[] itemSet, int support,
FPgrowthHeaderTable[] headerRef) {
// Process if more items in item set.
if (place Commence with the bottom
of the header table and work upwards. Working upwards from the bottom of
the header table if there is a link to an FP tree node :
- Count the support.
- Build up itemSet sofar.
- Add to supported sets.
- Build a new FP tree: (i) create a new local root, (ii) create a
new local header table and (iii) populate with ancestors.
- If new local FP tree is not empty repeat mining operation.
Otherwise end.
@param tableRef the reference to the current location in the header table
(commencing with the last item).
@param itemSetSofar the label fot the current item sets as generated to
date (null at start). */
private void startMining(FPgrowthHeaderTable[] tableRef,
short[] itemSetSofar) {
int headerTableEnd = tableRef.length-1;
FPgrowthColumnCounts[] countArray = null;
FPgrowthHeaderTable[] localHeaderTable = null;
FPtreeNode localRoot;
int support;
short[] newCodeSofar;
// Loop through header table from end to start, item by item
for (int index=headerTableEnd;index>=1;index--) {
// Check for null link
if (tableRef[index].nodeLink != null) {
// process trail of links from header table element
startMining(tableRef[index].nodeLink,tableRef[index].itemName,
itemSetSofar);
}
}
}
/** Commence process of mining FP tree with respect to a single element in
the header table.
@param nodeLink the firsty link from the header table pointing to an FP-tree
node.
@param itemName the label associated with the element of interest in the
header table.
@param itemSetSofar the item set represented by the current FP-tree. */
protected void startMining(FPgrowthItemPrefixSubtreeNode nodeLink,
short itemName, short[] itemSetSofar) {
// Count support for current item in header table and store a
// T-tree data structure
int support = genSupHeadTabItem(nodeLink);
short[] newCodeSofar = realloc2(itemSetSofar,itemName);
addToTtree(newCodeSofar,support);
// Collect ancestor itemSets and store in linked list structure
startTempSets=null;
generateAncestorCodes(nodeLink);
// Process Ancestor itemSets
if (startTempSets != null) {
// Count singles in linked list
FPgrowthColumnCounts[] countArray = countFPgrowthSingles();
// Create and populate local header table
FPgrowthHeaderTable[] localHeaderTable =
createLocalHeaderTable(countArray);
if (localHeaderTable != null) {
// Prune ancestor itemSets
pruneAncestorCodes(countArray);
// Create new local root for local FP tree
FPtreeNode localRoot = generateLocalFPtree(localHeaderTable);
// Mine new FP tree
startMining(localHeaderTable,newCodeSofar);
}
}
}
/* ---------------------------------------------------------------------- */
/* */
/* PROCESS CURRENT HEADER TABLE */
/* */
/* ---------------------------------------------------------------------- */
/* GENERATE SUPPORT FOR HEADER TABLE SINGLE ITEM: */
/** Counts support for single attributes in header table by following node
links.
@param nodeLink the start link from the header table.
@return the support valye for the item set indicated by the header table. */
private int genSupHeadTabItem(FPgrowthItemPrefixSubtreeNode nodeLink) {
int counter = 0;
// Loop
while(nodeLink != null) {
counter = counter+nodeLink.itemCount;
numUpdates++;
nodeLink = nodeLink.nodeLink;
}
// Return
return(counter);
}
/* ---------------------------------------------------------------------- */
/* */
/* ANCESTOR CODES */
/* */
/* ---------------------------------------------------------------------- */
/* GENERATE ANCESTOR CODES */
/** Generates ancestor itemSets are made up of the parent nodes of a given
node. This method collects such itemSets and stores them in a linked list
pointed at by startTempSets.
@param ref the reference to the current node in the prefix tree containing
itemsets together with support values.*/
private void generateAncestorCodes(FPgrowthItemPrefixSubtreeNode ref) {
short[] ancestorCode = null;
int support;
// Loop
while(ref != null) {
support = ref.itemCount;
ancestorCode = getAncestorCode(ref.parentRef);
// Add to linked list with current support
if (ancestorCode != null) startTempSets =
new FPgrowthSupportedSets(ancestorCode,support,
startTempSets);
// Next ref
ref = ref.nodeLink;
}
}
/* GET ANCESTOR CODE */
/** Generate the ancestor itemSet from a given node.
@param ref the reference to the current node in the prefix tree containing
itemsets together with support values. */
private short[] getAncestorCode(FPgrowthItemPrefixSubtreeNode ref) {
short[] itemSet = null;
if (ref == null) return(null);
// Else process
while (ref != null) {
itemSet = realloc2(itemSet,ref.itemName);
ref = ref.parentRef;
}
// Return
return(itemSet);
}
/* PRUNE ANCESTOR CODES */
/** Removes elements in ancestor itemSets (pointed at by
startTempSets) which are not supported by referring to count
array (which contains all the current supported 1 itemsets).
@param countArray the array of FPgrowthColumnCounts structures
describing the single item sets (in terms of labels and associated
support), contained in a linked list of FPgrowthSupportedSets
which in turn describe the ancestor nodes in an FP-tree that preceed the
nodes identified by following a trail of links from a particular item in
the header table. */
private void pruneAncestorCodes(FPgrowthColumnCounts[] countArray) {
FPgrowthSupportedSets ref = startTempSets;
// Loop through linked list of ancestor paths
while(ref != null) {
for(int index=0;indexFPgrowthColumnCounts structures describing the
single item sets (in terms of labels and associated support), contained in
a linked list of FPgrowthSupportedSets which in turn describe the
ancestor nodes in an FP-tree that preceed the nodes identified by following
a trail of links from a particular item in the header table. */
private FPgrowthColumnCounts[] countFPgrowthSingles() {
int index, place=0;
FPgrowthSupportedSets nodeLink = startTempSets; // Start of linked list
// Dimension array, assume all attributes present, then it will
// be possible to index in to the array.
FPgrowthColumnCounts[] countArray = new
FPgrowthColumnCounts[numOneItemSets+1];
// Initialise array
for (index=1;index= minSupport) counter++;
}
// Build new Header Table array containing only supported items
if (counter == 1) return(null);
FPgrowthHeaderTable[] localHeaderTable =
new FPgrowthHeaderTable[counter];
// Populate header table
int place=1;
for (int index=1;index= minSupport) {
localHeaderTable[place] = new
FPgrowthHeaderTable((short) countArray[index].columnNum);
place++;
}
}
// Return
return(localHeaderTable);
}
/* ORDER LOCAL HEADER TABLE */
/** Orders local header table (currently unused).
@param localHeaderTable the FPgrpwth header table to be ordered.
@param countArray the csupport for the 1 item sets. */
private void orderLocalHeaderTable(FPgrowthHeaderTable[] localHeaderTable,
FPgrowthColumnCounts[] countArray) {
boolean isOrdered;
FPgrowthHeaderTable temp;
int index, place1, place2;
//loop through table
do {
index = 1;
isOrdered=true;
while (index < (localHeaderTable.length-1)) {
place1 = localHeaderTable[index].itemName;
place2 = localHeaderTable[index+1].itemName;
if (countArray[place1].support > countArray[place2].support) {
isOrdered=false;
// Swap
temp = localHeaderTable[index];
localHeaderTable[index] = localHeaderTable[index+1];
localHeaderTable[index+1] = temp;
}
// increment index
index++;
}
} while (isOrdered==false);
}
/* ---------------------------------------------------------------------- */
/* */
/* GENERATE NEW FP-TREE */
/* */
/* ---------------------------------------------------------------------- */
/* GENERATE LOCAL FP-tree */
/** Generates a local FP tree
@param tableRef reference to start of header table containing links to
an FP-tree produced during the FP-tree generation process.
@return reference to the start of the generated FP-tree*/
private FPtreeNode generateLocalFPtree(FPgrowthHeaderTable[] tableRef) {
FPgrowthSupportedSets ref = startTempSets;
FPtreeNode localRoot = new FPtreeNode();
// Loop
while(ref != null) {
// Add to conditional FP tree
if (ref.itemSet != null) addToFPtree(localRoot,0,ref.itemSet,
ref.support,tableRef);
ref = ref.nodeLink;
}
// Return
return(localRoot);
}
/* ---------------------------------------------------------- */
/* */
/* FP-TREE UTILITIES */
/* */
/* ---------------------------------------------------------- */
/* REALLOC 1 FP-TREE */
/** Resizes the given array of FP-tree nodes so that its length is
increased by one element and new element inserted.
@param oldArray the given array of FP-tree nodes.
@param newNode the given node to be added to the FP-tree
@return The revised array of FP-tree nodes. */
private FPtreeNode[] reallocFPtreeChildRefs(FPtreeNode[] oldArray,
FPtreeNode newNode) {
// No old array
if (oldArray == null) {
FPtreeNode[] newArray = {newNode};
tempIndex = 0;
return(newArray);
}
// Otherwise create new array with length one greater than old array
int oldArrayLength = oldArray.length;
FPtreeNode[] newArray = new FPtreeNode[oldArrayLength+1];
// Insert new node in correct lexicographic order.
for (int index1=0;index1 < oldArrayLength;index1++) {
if (newNode.node.itemName < oldArray[index1].node.itemName) {
newArray[index1] = newNode;
for (int index2=index1;index2JTextArea class. */
public void outputItemPrefixSubtree(JTextArea textArea) {
int flag;
textArea.append("PREFIX SUBTREE FROM HEADER TABLE\n");
textArea.append("Format: Header N, then list (N) itemSet:count\n");
for(int index=1;indexJTextArea class. */
/*private void outputItemPrefixSubtree(JTextArea textArea,
FPgrowthHeaderTable[] tableRef) {
int flag;
textArea.append("PREFIX SUBTREE FROM LOCAL HEADER TABLE\n");
textArea.append("Format: Header N, then list (N) itemSet:count\n");
for(int index=1;indexJTextArea class.
@param ref the reference to the given branch.
@return a counter representing the current "node number" (used in
output). */
private int outputItemPrefixTree(JTextArea textArea,
FPgrowthItemPrefixSubtreeNode ref) {
int counter = 1;
// Loop
while (ref != null) {
textArea.append("(" + counter + ") " +
(reconvertItem(ref.itemName)) + ":" + ref.itemCount + " ");
counter++;
ref = ref.nodeLink;
}
return(counter);
}
/*-------------------*/
/* 2. OUTPUT FP TREE */
/*-------------------*/
/** Commences process of outputting FP-tree to screen. */
public void outputFPtree() {
System.out.println("FP TREE");
outputFPtreeNode1();
System.out.println();
}
/** Continues process of outputting FP-tree to screen. */
private void outputFPtreeNode1() {
outputFPtreeNode2(rootNode.childRefs,"");
}
/** Outputs a given level in an FP-tree to the screen.
@param ref the reference to the given FP-tree level.
@param nodeID the root string for the node ID. */
private void outputFPtreeNode2(FPtreeNode ref[],String nodeID) {
if (ref == null) return;
// Otherwise process
for (int index=0;indexJTextArea class. */
public void outputFPtree(JTextArea textArea) {
outputFPtreeNode1(textArea);
textArea.append("\n");
}
/** Continues process of outputting FP-tree to screen (GUI version).
@param textArea the given instance of the JTextArea class. */
private void outputFPtreeNode1(JTextArea textArea) {
outputFPtreeNode2(textArea,rootNode.childRefs,"");
}
/** Outputs a given level in an FP-tree to the screen (GUI version).
@param textArea the given instance of the JTextArea class.
@param ref the reference to the given FP-tree level.
@param nodeID the root string for the node ID. */
private void outputFPtreeNode2(JTextArea textArea,FPtreeNode ref[],
String nodeID) {
if (ref == null) return;
// Otherwise process
for (int index=0;indexJTextArea class.
@param ref the reference to the given node. */
public void outputItemPrefixSubtreeNode(JTextArea textArea,
FPgrowthItemPrefixSubtreeNode ref) {
textArea.append((reconvertItem(ref.itemName)) + ":" + ref.itemCount);
if (ref.nodeLink != null) {
textArea.append(" (ref to " +
(reconvertItem(ref.nodeLink.itemName)) + ":" +
ref.nodeLink.itemCount + ")\n");
}
else textArea.append(" (ref to null)\n");
}
/** Commences process of outputting a given branch of an FP-tree to the
screen (used for diagnostic purposes during development only).
@param ref the reference to the given FP-tree branch. */
private void outputFPtreeNode(FPtreeNode ref) {
System.out.println("LOCAL FP TREE");
outputFPtreeNode2(ref.childRefs,"");
System.out.println();
}
/*------------------------------*/
/* 3. OUTPUT FP-TREE STATISTICS */
/*------------------------------*/
/* OUTPUT FP TREE STORAGE: */
/** Commence process of determining and outputting FP-tree storage, number
of updates and number of nodes. */
public void outputFPtreeStorage() {
int storage=8; // 8 Bytes for root node
numberOfNodes = 1; // For root node
storage = calculateStorage(rootNode.childRefs,storage);
// Add header table.
storage = storage + (headerTable.length*6);
System.out.println("FP tree storage = " + storage + " (bytes)");
System.out.println("FP tree updates = " + numUpdates);
System.out.println("FP tree nodes = " + numberOfNodes);
}
/** Commence process of determining and outputting FP-tree storage, number
of updates and number of nodes (GUI version).
@param textArea the given instance of the JTextArea class. */
public void outputFPtreeStats(JTextArea textArea) {
int storage = 8; // 8 Bytes for root node
numberOfNodes = 1; // For root node
storage = calculateStorage(rootNode.childRefs,storage);
// Add header table.
storage = storage + (headerTable.length*6);
textArea.append("FP tree storage = " + storage + " (bytes)\n");
textArea.append("FP tree updates = " + numUpdates + "\n");
textArea.append("FP tree nodes = " + numberOfNodes + "\n");
}
/* CALCULATE STORAGE */
/** Determines storage requirements for FP-tree.
@param ref the reference to the current portion of the P-tree under
consideration.
@param storage the storage requirements so far.
@return the storage in Bytes required for the given FP=tree node.*/
private int calculateStorage(FPtreeNode[] ref, int storage) {
if (ref == null) return(storage);
// Process, each node has 14+8 bytes of storage, 8 for FP tree
// links (child and sibling), 14 for prefix tree links (parentRef,
// nodeRef, Support, ID
for (int index=0;indexFPgrowthColumnCounts structures
describing the single item sets (in terms of labels and associated
support), contained in a linked list of FPgrowthSupportedSets
which in turn describe the ancestor nodes in an FP-tree that preceed the
nodes identified by following a trail of links from a particular item in
the header table. */
private void outputColumnCount(FPgrowthColumnCounts[] countArray) {
for(int index=1;index