/* ------------------------------------------------------------------------- */ /* */ /* TOTAL SUPPORT TREE */ /* */ /* Frans Coenen */ /* */ /* 10 January 2003 */ /* (Revised 23/1/3003, 8/2/2003, 18/3/2003, 3/3/2003. 7/4/2004) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* ------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree To compile: javac AprioriTsortedPrunedApp.java To run */ /* Java packages */ import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Methods concerned with the generation, processing and manipulation of T-tree data storage structures used to hold the total support counts for large itemsets. @author Frans Coenen @version 3 July 2003 */ public class TotalSupportTree extends AssocRuleMining { /* ------ FIELDS ------ */ // Data structures /** The reference to start of t-tree. */ protected TtreeNode[] startTtreeRef; /** The serialisation array (used for distributed ARM applications when sending T-trees from one processor to another via a JavaSpace). */ //protected int[] serializationArray; /** The marker to the "current" location in the serialisation array.
initialised to zero. */
//protected int serializationRef = 0;
// Constants
/** The maximum number of frequent sets that may be generated. */
protected final int MAX_NUM_FREQUENT_SETS = 80000;
// Other fields
/** The next level indicator flag: set to true if new level
generated and by default. */
protected boolean nextLevelExists = true ;
/** Instance of the class RuleList */
protected RuleList currentRlist = null;
// Diagnostics
/** The number of frequent sets (nodes in t-tree with above minimum
support) generated so far. */
protected int numFrequentsets =0;
/** The number of updates required to generate the T-tree. */
protected long numUpdates = 0l;
/** Time to generate P-tree. */
protected String duration = null;
/* ------ CONSTRUCTORS ------ */
/** Processes command line arguments. */
public TotalSupportTree(String[] args) {
super(args);
// Create RuleList object for later usage
currentRlist = new RuleList();
}
/** With argument from existing instance of class AssocRuleMining. */
/*public TotalSupportTree(AssocRuleMining armInstance) {
numRows = armInstance.numRows;
numCols = armInstance.numCols;
support = armInstance.support;
confidence = armInstance.confidence;
dataArray = armInstance.dataArray;
minSupport = armInstance.minSupport;
numOneItemSets = armInstance.numOneItemSets;
} */
/** Default constructor */
/*public TotalSupportTree() {
} */
/* ------ METHODS ------ */
/*----------------------------------------------------------------------- */
/* */
/* T-TREE BUILDING METHODS */
/* */
/*----------------------------------------------------------------------- */
/* CREATE TOTAL SUPPORT TREE */
/** Commences process of generating a total support tree (T-tree). */
public void createTotalSupportTree() {
System.out.println("APRIORI-T WITH X-CHECKING\n" +
"-------------------------");
System.out.println("Minimum support threshold = " +
twoDecPlaces(support) + "% " + "(" +
twoDecPlaces(minSupport) + " records)");
// If no data (possibly as a result of an order and pruning operation)
// return
if (numOneItemSets==0) return;
// Set RuleList conversion and reconversion values so that they are the
// same as those inherrited from AssocRuleMining class
currentRlist.setReconversionArrayRefs(conversionArray,reconversionArray);
// Initilise T-tree data structure and diagnostic counters.
startTtreeRef = null;
numFrequentsets = 0;
numUpdates = 0l;
double time1 = (double) System.currentTimeMillis();
// Create Top level of T-tree (First pass of dataset). Defined in
// in TotlaSupportTree class.
createTtreeTopLevel();
// Generate level 2
generateLevel2();
// Further passes of the dataset
createTtreeLevelN();
duration = getDuration(time1,(double) System.currentTimeMillis());
}
/* CREATE T-TREE TOP LEVEL */
/** Generates level 1 (top) of the T-tree. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startTtreeRef = new TtreeNode[numOneItemSets+1];
for (int index=1;index<=numOneItemSets;index++)
startTtreeRef[index] = new TtreeNode();
// Add support for each 1 itemset
createTtreeTopLevel2();
// Prune top level, setting any unsupport 1 itemsets to null
pruneLevelN(startTtreeRef,1);
}
/* CREATE T-TREE 2nd LEVEL */
/** Adds supports to level 1 (top) of the T-tree. */
protected void createTtreeTopLevel2() {
// Loop through data set record by record and add support for each
// 1 itemset
for (int index1=0;index1
Where all nodes are supported and we wish to add the third level we would
walk the tree and attempt to add new nodes to every level 2 node found.
Having found the correct level we step through starting from B (we cannot
add a node to A), so in this case there is only one node from which a level
3 node may be attached.
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree).
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@param itemSet the current itemset under consideration. */
protected void generateLevelN(TtreeNode[] linkRef, int level,
short[] itemSet) {
int index1;
int localSize = linkRef.length;
// Correct level
if (level == 1) {
// Step through T-tree array in current branch at current level
for (index1=2;index1
where we wish to add a level 3 node to node (B), i.e. the node {A}, we
would proceed as follows:
Example 2, given:
where we wish to add a level 4 node (A) to (B) this would represent the
complete label {D,C,B,A}, the N-1 subsets will then be {{D,C,B},{D,C,A},
{D,B,A} and {C,B,A}}. We know the first two are supported becuase they are
contained in the current sub-branch of the T-tree, {D,B,A} and {C,B,A} are
not.
@param parentRef the reference to the level in the sub-branch of the T-tree
under consideration.
@param endIndex the index of the current node under consideration.
@param itemSet the complete label represented by the current node (required
to generate further itemsets to be X-checked). */
protected void generateNextLevel(TtreeNode[] parentRef, int endIndex,
short[] itemSet) {
parentRef[endIndex].childRef = new TtreeNode[endIndex]; // New level
short[] newItemSet;
// Generate a level in Ttree
TtreeNode currentNode = parentRef[endIndex];
// Loop through parent sub-level of siblings upto current node
for (int index=1;index itemSet1 = first two items in current item set
itemSet2 = remainder of items in current item set
Example 1:
Example 2:
Operates in a recursive manner.
Example 1: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C}
Example 2: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C,D}
Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given T-tree level (set to 1 at
start).
@param lastIndex the end index of the current T-tree level.
@param linRef the reference to the current T-tree level.
@return returns true if itemset found and false otherwise. */
private boolean findItemSetInTtree2(short[] itemSet, int index,
int lastIndex, TtreeNode[] linkRef) {
// Attribute at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If attribute at "index" is last element of item set then item set
// found
if (index == lastIndex) return(true);
// Otherwise continue
else if (linkRef[itemSet[index]].childRef!=null) {
return(findItemSetInTtree2(itemSet,index+1,lastIndex,
linkRef[itemSet[index]].childRef));
}
// No child branch, item set not in Ttree thererfore return false
else return(false);
}
// Item set not in Ttree
else return(false);
}
/* GET SUPPORT FOT ITEM SET IN T-TREE */
/** Commences process for finding the support value for the given item set
in the T-tree. Used when generating Associoation Rules (ARs). Note that
itemsets are stored in reverse order in the T-tree therefore the given
itemset must be processed in reverse.
@param itemSet the given itemset.
@return returns the support value (0 if not found). */
protected int getSupportForItemSetInTtree(short[] itemSet) {
int lastIndex = itemSet.length-1;
// Last element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startTtreeRef[itemSet[lastIndex]] != null) {
// If single item set return support
if (lastIndex == 0) return(startTtreeRef[itemSet[0]].support);
// Otherwise continue down branch (if available)
else if (startTtreeRef[itemSet[lastIndex]].childRef!=null) {
return(getSupportForItemSetInTtree2(itemSet,lastIndex-1,
startTtreeRef[itemSet[lastIndex]].childRef));
}
// No child branch, item set not in Ttree thererfore return 0
else return(0);
}
// Item set not in Ttree thererfore return 0
else return(0);
}
/** Returns the support value for the given itemset if found in the T-tree
and 0 otherwise. Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given itemset.
@param linRef the reference to the current T-tree level.
@return returns the support value (0 if not found). */
private int getSupportForItemSetInTtree2(short[] itemSet, int index,
TtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If element at "index" is last element of item set then item set
// found
if (index == 0) return(linkRef[itemSet[0]].support);
// Otherwise continue down branch (if available)
else if (linkRef[itemSet[index]].childRef!=null) {
return(getSupportForItemSetInTtree2(itemSet,index-1,
linkRef[itemSet[index]].childRef));
}
// No child branch, item set not in Ttree thererfore return 0
else return(0);
}
// Item set not in Ttree thererfore return 0
else return(0);
}
/*----------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE (AR) GENERATION */
/* */
/*----------------------------------------------------------------------- */
/* GENERATE ASSOCIATION RULES */
/** Initiates process of generating Association Rules (ARs) from a
T-tree. */
public void generateARs() {
// Command line interface output
System.out.println("GENERATE ARs:\n-------------");
// Set rule data structure to null
currentRlist.startRulelist = null;
// Generate
generateARs2();
}
/** Loops through top level of T-tree as part of the AR generation
process. */
private void generateARs2() {
// Loop
for (int index=1;index <= numOneItemSets;index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSoFar = new short[1];
itemSetSoFar[0] = (short) index;
generateARs(itemSetSoFar,index,
startTtreeRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Continues process of generating association rules from a T-tree by
recursively looping through T-tree level by level.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array lavel in the T-tree. */
protected void generateARs(short[] itemSetSofar, int size,
TtreeNode[] linkRef) {
// If no more nodes return
if (linkRef == null) return;
// Otherwise process
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
// Temp itemset
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Generate ARs for current large itemset
generateARsFromItemset(tempItemSet,linkRef[index].support);
// Continue generation process
generateARs(tempItemSet,index,linkRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Generates all association rules for a given large item set found in a
T-tree structure. Called from generateARs method.
@param itemSet the given large itemset.
@param support the associated support value for the given large itemset. */
private void generateARsFromItemset(short[] itemSet, double support) {
// Determine combinations
short[][] combinations = combinations(itemSet);
// Loop through combinations
for(int index=0;index
Not the same as counting the number of nodes created as some of these may
have been pruned.
@return the number of nodes. */
/*protected int countNumberOfTtreeNodes() {
int counter = 0;
// Loop
for (int index=1; index < startTtreeRef.length; index++) {
if (startTtreeRef[index] !=null) {
counter++;
counter = countNumberOfTtreeNodes(counter,
startTtreeRef[index].childRef);
}
}
// Return
return(counter);
} */
/* COUNT T-TREE NODES IN N SUB-BRANCHES*/
/** Commences process of counting the number of nodes in a sequence of
T-tree sub-branches.
@return the number of nodes. */
/*private int countNumTtreeNodesInNbranches(int startIndex, int endIndex) {
int counter = 0;
// Loop
for (int index=startIndex; index <= endIndex; index++) {
if (startTtreeRef[index] !=null) {
counter++;
counter = countNumberOfTtreeNodes(counter,
startTtreeRef[index].childRef);
}
}
// Return
return(counter);
} */
/** Continues process of counting number of nodes in a T-tree.
@param counter the count sofar.
@param linkref the reference to the current location in the T-tree.
@return the updated count. */
/*private int countNumberOfTtreeNodes(int counter,TtreeNode[] linkRef) {
// Check for empty branch/sub-branch.
if (linkRef == null) return(counter);
// Loop through current level of branch/sub-branch.
for (int index=1;index Operates in a recursive manner.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array lavel in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(int number, String itemSetSofar, int size,
TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
itemSetSofar = itemSetSofar + " ";
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
System.out.println("[" + number + "] {" + itemSetSofar +
index +"} = " + linkRef[index].support);
String newitemSet = itemSetSofar +
new Integer(index).toString();
number = outputFrequentSets(number + 1,newitemSet,index,
linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* OUTPUT FREQUENT SETS (GUI VERSION) */
/** Commences the process of outputting the frequent sets contained in
the T-tree to a rext area.
@param the text area. */
public void outputFrequentSets(JTextArea textArea) {
int number = 1;
textArea.append("FREQUENT (LARGE) ITEM SETS:\n" +
"---------------------------\n");
// Loop
for (int index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
textArea.append("[" + number + "] {" + index + "} = " +
startTtreeRef[index].support + "\n");
number = outputFrequentSets(textArea,number+1,
new Integer(index).toString(),
index,startTtreeRef[index].childRef);
}
}
}
// End
textArea.append("\n");
}
/** Outputs T-tree frequent sets. Operates in a recursive manner.
@param the text area.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array lavel in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(JTextArea textArea, int number,
String itemSetSofar, int size, TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
itemSetSofar = itemSetSofar + " ";
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
textArea.append("[" + number + "] {" + itemSetSofar +
index +"} = " + linkRef[index].support + "\n");
String newitemSet = itemSetSofar +
new Integer(index).toString();
number = outputFrequentSets(textArea,number + 1,newitemSet,
index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* ------------------------------ */
/* 4. OUTPUT NUMBER FREQUENT SETS */
/* ------------------------------ */
/** Commences the process of counting and outputing number of supported
nodes in the T-tree. A supported set is assumed to be a non null node in
the T-tree. */
public void outputNumFreqSets() {
// If emtpy tree (i.e. no supported sets) do nothing
if (startTtreeRef== null) System.out.println("Number of frequent " +
"sets = 0");
// Otherwise count and output
else System.out.println("Number of frequent sets = " +
countNumFreqSets());
}
/* COUNT NUMBER OF FRQUENT SETS */
/** Commences process of counting the number of frequent (large/supported
sets conayoned in the T-tree. */
protected int countNumFreqSets() {
// If emtpy tree return 0
if (startTtreeRef == null) return(0);
// Otherwise loop through T-tree starting with top level
int num=0;
for (int index=1; index <= numOneItemSets; index++) {
// Check for null valued top level Ttree node.
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport)
num = countNumFreqSets(index,
startTtreeRef[index].childRef,num+1);
}
}
// Return
return(num);
}
/** Counts the number of supported nodes in a sub branch of the T-tree.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array lavel in the T-tree.
@param num the number of frequent serts sofar. */
protected int countNumFreqSets(int size, TtreeNode[] linkRef, int num) {
if (linkRef == null) return(num);
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport)
num = countNumFreqSets(index,
linkRef[index].childRef,num+1);
}
}
// Return
return(num);
}
/* --------------------------------------------------- */
/* 5. OUTPUT NUMBER OF FREQUENT SETS PER T-TREE BRANCH */
/* --------------------------------------------------- */
/** Outputs the number of supported sets per T-tree branch descending from
the top-level of the tree. Used for diagnostic purposes. */
public void outputNumFreqSetsPerBranch() {
System.out.println("Number of frequent sets per branch");
for (int index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) {
System.out.println("(" + index + ")" + countNumFreqSets(index,
startTtreeRef[index].childRef,1));
}
}
}
/* --------------------------- */
/* 6. OUTPUT T-TREE STATISTICS */
/* --------------------------- */
/** Commences the process of outputting T-tree statistics (for diagnostic
purposes): (a) Storage, (b) Number of nodes on P-tree, (c) number of
partial support increments (updates) and (d) generation time. */
public void outputTtreeStats() {
System.out.println("T-TREE STATISTICS\n-----------------");
System.out.println(calculateStorage() + " (Bytes) storage");
System.out.println(TtreeNode.getNumberOfNodes() + " nodess");
System.out.println(countNumFreqSets() + " frequent sets");
System.out.println(numUpdates + " support value increments");
System.out.println(duration);
}
/** Commences the process of outputting T-tree statistics:GUI version.
@param the text area. */
public void outputTtreeStats(JTextArea textArea) {
textArea.append("T-TREE STATISTICS\n-----------------\n");
textArea.append(calculateStorage() + " (Bytes) storage\n");
textArea.append(TtreeNode.getNumberOfNodes() + " nodess\n");
textArea.append(countNumFreqSets() + " frequent sets\n");
textArea.append(numUpdates + " support value increments\n");
textArea.append(duration + "\n");
}
/* --------------------------- */
/* 7. OUTPUT NUMBER OF UPDATES */
/* --------------------------- */
/** Commences the process of determining and outputting the storage
requirements (in bytes) for the T-tree */
/** Outputs the number of update and number of nodes created during the
generation of the T-tree (the later is not the same as the number of
supported nodes). */
public void outputNumUpdates() {
System.out.println("Number of Nodes created = " +
TtreeNode.getNumberOfNodes());
System.out.println("Number of Updates = " + numUpdates);
}
/* ----------------- */
/* 8. OUTPUT STORAGE */
/* ----------------- */
/** Commences the process of determining and outputting the storage
requirements (in bytes) for the T-tree. Example: Given ---
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (D)
|
|
(A) ----- (B) ----- (C)
|
|
(A) ----- (B)
currentItemSet = {A,B,C}
itemSet1 = {B,A} (change of ordering)
size = {A,B,C}-2 = 1
itemSet2 = {C} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C}
currentItemSet = {A,B,C,D}
itemSet1 = {B,A} (change of ordering)
itemSet2 = {C,D} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C,D}
@param currentItemSet the given itemset. */
protected boolean testCombinations(short[] currentItemSet) {
// No need to test 1- and 2-itemsets
if (currentItemSet.length < 3) return(true);
// Creat itemSet1 (note ordering)
short[] itemSet1 = new short[2];
itemSet1[0] = currentItemSet[1];
itemSet1[1] = currentItemSet[0];
// Creat itemSet2
int size = currentItemSet.length-2;
short[] itemSet2 = removeFirstNelements(currentItemSet,2);
// Calculate combinations
return(combinations(null,0,2,itemSet1,itemSet2));
}
/* COMBINATIONS */
/** Determines the cardinality N combinations of a given itemset and then
checks whether those combinations are supported in the T-tree.
itemSet2.length = 1
endIndex = 2 greater than itemSet2.length if condition succeeds
tesSet = null+{B,A} = {B,A}
retutn true if {B,A} supported and null otherwise
endindex not greater than length {C,D}
go into loop
tempSet = {} + {C} = {C}
combinations with --- sofarSet={C}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {C} + {B,A} = {C,B,A}
tempSet = {} + {D} = {D}
combinations with --- sofarSet={D}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {D} + {B,A} = {D,B,A}
@param sofarSet The combination itemset generated so far (set to null at
start)
@param startIndex the current index in the given itemSet2 (set to 0 at
start).
@param endIndex The current index of the given itemset (set to 2 at start)
and incremented on each recursion until it is greater than the length of
itemset2.
@param itemSet1 The first two elements (reversed) of the totla label for the
current item set.
@param itemSet2 The remainder of the current item set.
*/
private boolean combinations(short[] sofarSet, int startIndex,
int endIndex, short[] itemSet1, short[] itemSet2) {
// At level
if (endIndex > itemSet2.length) {
short[] testSet = append(sofarSet,itemSet1);
// If testSet exists in the T-tree sofar then it is supported
return(findItemSetInTtree(testSet));
}
// Otherwise
else {
short[] tempSet;
for (int index=startIndex;index
{1,2,3}
{1,2,3}
{1,2,3}
{1,2,3}
{1,2,3}
This will produce a T-tree as shown below:
+---+---+---+---+
| 0 | 1 | 2 | 3 |
+---+---+---+---+
| | |
| | +-----------+
| | |
| +---+ +---+---+---+
| | | 0 | 1 | 2 |
( 5 ) +---+---+ +---+---+---+
(nul) | 0 | 1 | | |
+---+---+ | +----+
| | |
| | +---+---+
( 5 ) | | 0 + 1 |
(nul) ( 5 ) +---+---+
(nul) |
|
( 5 )
(nul)
0 elements require 4 bytes of storage, null nodes (not shown above) 4 bytes
of storage, others 12 bytes of storage. */
public void outputStorage() {
// If emtpy tree (i.e. no supported serts) do nothing
if (startTtreeRef == null) return;
/* Otherwise calculate stoarge */
System.out.println("T-tree Storage = " + calculateStorage() +
" (Bytes)");
}
/* CALCULATE STORAGE */
/** Commences process of claculating storage requirements for T-tree. */
protected int calculateStorage() {
// If emtpy tree (i.e. no supported serts) return 0
if (startTtreeRef == null) return(0);
/* Step through top level */
int storage = 4; // For element 0
for (int index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) storage = storage + 12 +
calculateStorage(0,startTtreeRef[index].childRef);
else storage = storage+4;
}
// Return
return(storage);
}
/** Calculate storage requirements for a sub-branch of the T-tree.
@param localStorage the storage as calculated sofar (set to 0 at start).
@param linkRef the reference to the current sub-branch of the T-tree. */
private int calculateStorage(int localStorage, TtreeNode[] linkRef) {
if (linkRef == null) return(0);
for (int index=1; index < linkRef.length; index++) {
if (linkRef[index] !=null) localStorage = localStorage + 12 +
calculateStorage(0,linkRef[index].childRef);
else localStorage = localStorage + 4;
}
/* Return */
return(localStorage+4); // For element 0
}
}