/* -------------------------------------------------------------------------- */ /* */ /* P A R T I A L S U P P O R T T R E E */ /* */ /* Frans Coenen */ /* */ /* Wednesday 9 January 2003 */ /* (Revised 5/7/2003) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree */ /* Java packages */ import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Methods to implement the "Apriori-TFP" (Total From Partial) ARM algorithm using both the T-tree (Total support tree) and P-tree (Partial support tree data structures. @author Frans Coenen @version 5 July 2003 */ public class PartialSupportTree extends TotalSupportTree { /*------------------------------------------------------------------------*/ /* */ /* FIELDS */ /* */ /*------------------------------------------------------------------------*/ /* ------ NESTED CLASSES ------ */ /** Structurte to contain P-tree data in tabular form for improved computational efficiency when creating T-tree.
A 2-D array of these structures is created in which to store the Ptree. */ protected class PtreeRecord { /** Label for P-tree node. */ private short[] pTreeNodeLabel = null; /** Uunion of a pTree node label (pTreeNodeLabel) and all its ancestor node labels. */ private short[] pTreeItemSet = null; /** Partial support count. */ private int support = 0; /** Creaste P-tree record for inclusion in table. @param nodeLabel the label for P-tree node. @param itemSet the uunion of a pTree node label (nodeLabel) and all its ancestor node labels. @param sup the partial support count. */ private PtreeRecord(short[] nodeLabel, short[] itemSet, int sup) { pTreeNodeLabel = nodeLabel; pTreeItemSet = itemSet; support = sup; } } /* Array data structures for P tree */ /** Null array describing an "emty" itemset. */ private short[] zeroitemSet = null; /** Reference variable pointing to start of P-tree. */ private PtreeNodeTop[] startPtreeRef = null; /* ------ P-TREE DATA TABLE ----- */ /** Array of arrays data structures for P-tree table (used as a computational efficiency measure). */ protected PtreeRecord[][] startPtreeTable = null; /** Array for holding the number of P-tree nodes for each possible cardinality given the number of attributes, maximum is equal to number of columns. */ private int[] pTreeNodesOfCardinalityN = null; /** Array of "markers" used during the generation of the P-tree, and contains "current index" values for each level of cardinality. */ private int[] pTreeTableMarker = null; /* Other fields */ /** Number of node updates (used for diagnostic purposes). */ private int numberOfNodeUpdates = 0; /*---------------------------------------------------------------------*/ /* */ /* CONSTRUCTORS */ /* */ /*---------------------------------------------------------------------*/ /** Processes command line arguments.
The numOneItemSets is
incremented by 1 so that indexes match the column numbers */
public PartialSupportTree(String[] args) {
super(args);
}
/*-------------------------------------------------------------------*/
/* */
/* TREE BUILDING METHODS */
/* */
/*-------------------------------------------------------------------*/
/* CREATE P-TREE */
/** Processes data set causing each row to be added to P-Tree. */
public void createPtree() {
System.out.println("GENERATING P-TREE\n------------------");
// Dimension top line of P-tree
startPtreeRef = new PtreeNodeTop[numOneItemSets+1];
// Dimension P-tree table
startPtreeTable = new PtreeRecord[numOneItemSets+1][];
pTreeNodesOfCardinalityN = new int[numOneItemSets+1];
pTreeTableMarker = new int[numOneItemSets+1];
// Initilalise top level of Ptree with nulls
for(int index=0;index Operates as follows:
If found leaf node create new node and stop. Otherwise:
Codes generated by call the checkitemSet function. Arguments as follows:
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param parentLength the number of elements represented by the parent node
of the current node. Used only when adding new "dummy" nodes to maintain
count of number of nodes of a given size for when the Ptree table is
generated.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param itemSet the row itemSet in the input file currently under
consideration.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes. */
public void addToPtree(int flag, int parentLength, int itemSetLength,
PtreeNode linkRef, short[] itemSet, int topIndex,
PtreeNode oldRef) {
// No child node hanging of previous level array therefore add new
// node here.
if (linkRef == null) {
PtreeNode newRef = createPtreeNode(itemSet,itemSetLength);
addSupport2(flag,newRef,topIndex,oldRef);
}
// Otherwise process tree
else {
switch (checkItemSets(itemSet,linkRef.itemSet)) {
case 1: /* Rule 1: Same */
numberOfNodeUpdates++;
linkRef.support++;
break;
case 2: /* Rule 2: Before and subset (parent) */
beforeAndSubset(flag,itemSetLength,linkRef,itemSet,topIndex,
oldRef);
break;
case 3: /* Rule 3: Before and not subset (elder sibling) */
beforeAndNotSubset(flag,parentLength,itemSetLength,linkRef,
itemSet,topIndex,oldRef);
break;
case 4: /* Rule 4: After and superset (child) */
afterAndSuperset(parentLength,itemSetLength,linkRef,
itemSet);
break;
case 5: /* Rule 5: After and not superset (younger sibling) */
afterAndNotSuperset(flag,parentLength,itemSetLength,linkRef,
itemSet,topIndex,oldRef);
break;
default: /* Default: Error */
}
}
}
/* BEFORE AND SUBSET */
/** Adds new node into the P-tree on a parent/child link so that the new
node is the parent of the existing child branch and the child of the
previous "parent".
Possibilities:
First checks for leading substring with existing node;
if found and leading substring is not same as current parent creates a new
P-tree node for this substring and then adds in the new node. Possibilities:
If no more child nodes add new node to "current" node as child,
else carry on down the tree with flag set to 1 to indicate we are following
a child branch. Possibilities:
Possible actions:
Also tests for leading
substring. If found and this is not equal to an existing parent itemSet
method causes a dummy node to represent the substring to be inserted (call
to addSupport2). Possibilities:
Possible actions:
The flag argument indicates which type of branch is currently under
consideration: 0 = root, 1 = child, 2 = sibling.
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param newRef the reference (pointer) to newly created parent indicating
current laction in the P-tree.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes. */
private void addSupport2(int flag, PtreeNode newRef, int topIndex,
PtreeNode oldRef) {
// Add node
switch (flag) {
case 0:
startPtreeRef[topIndex].childRef = newRef;
break;
case 1:
oldRef.childRef = newRef;
break;
case 2:
oldRef.siblingRef = newRef;
break;
default:
System.out.println("ERROR: Unidentified flag in addSupport\n");
}
}
/* CREAT P-TREE NODE */
/** Creates a P-tree node (other than a top level node).
@param newItemSet the itemset to be stored at the node.
@param level the cardinality (length) of the item set to be stored in
the node. */
private PtreeNode createPtreeNode(short[] itemSet, int level) {
pTreeNodesOfCardinalityN[level]++;
return(new PtreeNode(itemSet));
}
/* GET START OF P-TREE */
/** Gets reference to start of P-tree. */
public PtreeNodeTop[] getStartOfPtree() {
return(startPtreeRef);
}
/* GET NUMBER P-TREE NODES */
/** Gets number of nodes in P-tree. */
public int getNumPtreeNodes() {
return(calculateNumNodes(startPtreeRef));
}
/*----------------------------------------------------------------------- */
/* */
/* P-TREE TABLE */
/* */
/*----------------------------------------------------------------------- */
/* CREATE P-TREE TABLE */
/** Creates P-tree table starting with top level in P-tree. Proceed as
follows.
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param currentItemSet the row itemSet in the input file currently under
consideration.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes.*/
private void beforeAndSubset(int flag, int itemSetLength, PtreeNode linkRef,
short[] currentitemSet, int topIndex, PtreeNode oldRef) {
// Create new node with support of current node added in;
PtreeNode newRef = createPtreeNode(currentitemSet,itemSetLength);
newRef.support = newRef.support+linkRef.support;
numberOfNodeUpdates++;
// Link in existing branch
newRef.childRef = linkRef;
// Connect new node into tree structure and adjust existing current
// node itemSet so that it does not include the new parent node itemSet
addSupport2(flag,newRef,topIndex,oldRef);
linkRef.itemSet = realloc4(linkRef.itemSet,currentitemSet);
// Check whether any siblings of the existing node need to be
// "moved up" to become a sibling of the new node
newRef.siblingRef=linkRef.siblingRef;
linkRef.siblingRef=null;
}
/* BEFORE AND NOT SUBSET */
/** Insets node into P-tree where new itemset is an elder sibling of the
"current" node.
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param parentLength the number of elements represented by the parent node
of the current node. Used only when adding new "dummy" nodes to maintain
count of number of nodes of a given size for when the Ptree table is
generated.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param currentItemSet the row itemSet in the input file currently under
consideration.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes. */
private void beforeAndNotSubset(int flag, int parentLength,
int itemSetLength, PtreeNode linkRef, short[] currentItemSet,
int topIndex, PtreeNode oldRef) {
// Find leading common ellements in row and sibling itemSets if any
short[] leadingSubString = checkForLeadingSubString(currentItemSet,
linkRef.itemSet);
// If leading common ellements exists create new node representing
// common elements and add current itemSet as child of this new node.
// Otherwise add new itemSet as elder sibling.
if (leadingSubString != null) { // Leading substring exists
// Create new dummy parent representing subset
PtreeNode newDummyParentRef = createPtreeNode(leadingSubString,
leadingSubString.length+parentLength-1);
// Add support made up of existing support + 1 for current row
newDummyParentRef.support = linkRef.support+1;
// Insert new parent node into tree
addSupport2(flag,newDummyParentRef,topIndex,oldRef);
// Remove leading substring from row itemSet and attach as child
// of dummy parent node
currentItemSet = realloc4(currentItemSet,leadingSubString);
newDummyParentRef.childRef = createPtreeNode(currentItemSet,
itemSetLength);
// Remove leading substring from current node itemSet and place
// current node as sibling of new node
linkRef.itemSet=realloc4(linkRef.itemSet,leadingSubString);
newDummyParentRef.childRef.siblingRef = linkRef;
// Check whether any siblings need to be "moved up"
newDummyParentRef.siblingRef=linkRef.siblingRef;
linkRef.siblingRef=null;
}
else { // Create new node
PtreeNode newSiblingRef =
createPtreeNode(currentItemSet,itemSetLength);
// Attach existing node as younger sibling
newSiblingRef.siblingRef = linkRef;
// Insert into tree
addSupport2(flag,newSiblingRef,topIndex,oldRef);
}
}
/* AFTER AND SUPERSET */
/** Insets node into P-tree where new itemset is an child of the "current"
node.
@param parentLength the number of elements represented by the parent node
of the current node. Used only when adding new "dummy" nodes to maintain
count of number of nodes of a given size for when the Ptree table is
generated.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param currentItemSet the row itemSet in the input file currently under
consideration. */
private void afterAndSuperset(int parentLength, int itemSetLength,
PtreeNode linkRef, short[] currentItemSet) {
numberOfNodeUpdates++;
linkRef.support = linkRef.support+1; // Increment support
// End of child branch
if (linkRef.childRef == null) {
// Remove existing parent itemSet from currentItemSet
PtreeNode newRef = createPtreeNode(realloc4(currentItemSet,
linkRef.itemSet),itemSetLength);
// Add to existing node as child
linkRef.childRef = newRef;
}
// More children, remove existing current itemSet from row itemSet and
// continue down child branch
else addToPtree(1,parentLength+linkRef.itemSet.length,itemSetLength,
linkRef.childRef,realloc4(currentItemSet,
linkRef.itemSet),0,linkRef);
}
/* AFTER AND NOT SUPERSET */
/** Commeences process of inserting node into P-tree where new itemset is
a younger sibling of the "current" node.
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param parentLength the number of elements represented by the parent node
of the current node. Used only when adding new "dummy" nodes to maintain
count of number of nodes of a given size for when the Ptree table is
generated.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param currentItemSet the row itemSet in the input file currently under
consideration.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes. */
private void afterAndNotSuperset(int flag, int parentLength,
int itemSetLength, PtreeNode linkRef, short[] currentItemSet,
int topIndex, PtreeNode oldRef) {
// Test if end of sibling branch, if not continue
if (linkRef.siblingRef == null)
afterAndNotSuperset1(flag,parentLength,itemSetLength,
linkRef,currentItemSet,topIndex,oldRef);
// Not end of sibling branch
else afterAndNotSuperset2(flag,parentLength,itemSetLength,linkRef,
currentItemSet,topIndex,oldRef);
}
/* AFTER AND NOT SUPERSET 1 */
/** Inserts node into P-tree where new itemset is a younger sibling of the
"current" node and there are no more younger siblings on current existing
node therefore new node added as sibling.
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param parentLength the number of elements represented by the parent node
of the current node. Used only when adding new "dummy" nodes to maintain
count of number of nodes of a given size for when the Ptree table is
generated.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param currentItemSet the row itemSet in the input file currently under
consideration.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes. */
private void afterAndNotSuperset1(int flag, int parentLength,
int itemSetLength, PtreeNode linkRef, short[] currentItemSet,
int topIndex, PtreeNode oldRef) {
// Find leading common ellements in row and sibling itemSets if any
short[] leadingSubString = checkForLeadingSubString(currentItemSet,
linkRef.itemSet);
// If leading common ellements exists create new node representing
// common elements and add current itemSet as child of this new node.
// Otherwise add new itemSet as elder sibling.
if (leadingSubString != null) {
// Create new parent representing subset
PtreeNode newParent = createPtreeNode(leadingSubString,
leadingSubString.length+parentLength-1);
// Add support made up of existing support + 1 for current row
newParent.support = linkRef.support+1;
// Insert new parent node into tree
addSupport2(flag,newParent,topIndex,oldRef);
// Remove leading substring from current existing node
linkRef.itemSet = realloc4(linkRef.itemSet,leadingSubString);
// Attach existing branch as child of new parent and new node
// as sibling
newParent.childRef = linkRef;
linkRef.siblingRef = createPtreeNode(realloc4(currentItemSet,
leadingSubString),itemSetLength);
}
// No leading substring
else linkRef.siblingRef = createPtreeNode(currentItemSet,
itemSetLength);
}
/* AFTER AND NOT SUPERSET 2 */
/** Inserts node into P-tree where new itemset is a younger sibling of the
"current" node and there are more younger sibling on current existing
node.
Possibility:
@param flag the type of branch currently under consideration:
code 0 = root, 1 = child, 2 = sibling.
@param parentLength the number of elements represented by the parent node
of the current node. Used only when adding new "dummy" nodes to maintain
count of number of nodes of a given size for when the Ptree table is
generated.
@param itemSetLength the number of elements in the current itemSet. Used
only when adding new nodes to maintain count of number of nodes of a given
size for when the Ptree table is generated.
@param linkRef the reference (pointer) to current location in the P-tree.
@param currentItemSet the row itemSet in the input file currently under
consideration.
@param topIndex the index of the element in the array marking the top level
of the P-tree, used only when inserting new nodes hanging from this top
level otherwise ignored.
@param oldRef the reference (pointer) to the previous location in the P-tree,
used when inserting new nodes. */
private void afterAndNotSuperset2(int flag, int parentLength,
int itemSetLength, PtreeNode linkRef,
short[] currentItemSet, int topIndex, PtreeNode oldRef) {
// Find leading common ellements in row and sibling itemSets if any
short[] leadingSubString = checkForLeadingSubString(currentItemSet,
linkRef.itemSet);
// If leading common ellements exists create new node representing
// common elements and add current itemSet as child of this new node.
// Otherwise add new itemSet as elder sibling.
if (leadingSubString != null) {
// Create new "dummy" parent representing leading common elements
PtreeNode newDummyParentRef = createPtreeNode(leadingSubString,
leadingSubString.length+parentLength-1);
// Add support made up of existing support + 1 for current row
newDummyParentRef.support = linkRef.support+1;
// Insert new parent node into tree
addSupport2(flag,newDummyParentRef,topIndex,oldRef);
// Remove leading substring from current existing node and add
// as child of new parent node
linkRef.itemSet = realloc4(linkRef.itemSet,leadingSubString);
newDummyParentRef.childRef = linkRef;
// Store reference to existing branch of current existing node
// in temporary variable
PtreeNode tempRef = linkRef.siblingRef;
// Removeleading substring from current record
currentItemSet = realloc4(currentItemSet,leadingSubString);
// Create new node representing row and add as sibling of
// current node
linkRef.siblingRef = createPtreeNode(currentItemSet,itemSetLength);
//Now add in previous siblings
newDummyParentRef.siblingRef=tempRef;
}
// Otherwise carry on along sibling branch
else addToPtree(2,parentLength,itemSetLength,linkRef.siblingRef,
currentItemSet,0,linkRef);
}
/* ------ ADD SUPPORT 2 ------ */
/** Adds new node where "before and subset" or "after and not superset".
*/
public void createPtreeTable() {
// Set up array of arrays
for (int index=1;index