/* ------------------------------------------------------------------------- */ /* */ /* TOTAL SUPPORT TREE */ /* */ /* Frans Coenen */ /* */ /* 10 January 2003 */ /* (Revised 23/1/2003, 8/2/2003, 18/3/2003, 3/3/2003, 7/4/2004, 19/1/2005, */ /* 3/2/2006, 31/5/2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* ------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree */ //package lucsKDD_ARM; /* Java packages */ import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Methods concerned with the generation, processing and manipulation of T-tree data storage structures used to hold the total support counts for large itemsets. @author Frans Coenen @version 3 July 2003 */ public class TotalSupportTree extends AssocRuleMining { /* ------ FIELDS ------ */ // Data structures /** The reference to start of t-tree. */ protected TtreeNode[] startTtreeRef; /** The serialisation array (used for distributed ARM applications when sending T-trees from one processor to another via a JavaSpace). */ protected int[] serializationArray; /** The marker to the "current" location in the serialisation array.
initialised to zero. */
protected int serializationRef = 0;
// Constants
/** The maximum number of frequent sets that may be generated. */
protected final int MAX_NUM_FREQUENT_SETS = 500000;
// Other fields
/** The next level indicator flag: set to true if new level
generated and by default. */
protected boolean nextLevelExists = true ;
/** Number of levels in T-tree. */
protected int numLevelsInTtree = 0;
// Diagnostics
/** The number of frequent sets (nodes in t-tree with above minimum
support) generated so far. */
protected int numFrequentsets =0;
/** The number of updates required to generate the T-tree. */
protected long numUpdates = 0l;
/** Time to generate T-tree. */
protected String duration = null;
/* ------ CONSTRUCTORS ------ */
/** One argument constructor with command line arguments to be process.
@param args the command line arguments (array of String instances). */
public TotalSupportTree(String[] args) {
super(args);
}
/** One argument constructior with argument from existing instance of
class AssocRuleMining.
@param armInstance the given instance of the AssocRuleMining
class. */
public TotalSupportTree(AssocRuleMining armInstance) {
super(armInstance);
numRows = armInstance.numRows;
numCols = armInstance.numCols;
support = armInstance.support;
confidence = armInstance.confidence;
dataArray = armInstance.dataArray;
minSupport = armInstance.minSupport;
numOneItemSets = armInstance.numOneItemSets;
conversionArray = armInstance.conversionArray;
reconversionArray = armInstance.reconversionArray;
}
/** Default constructor */
public TotalSupportTree() {
}
/* ------ METHODS ------ */
/*----------------------------------------------------------------------- */
/* */
/* T-TREE BUILDING METHODS */
/* */
/*----------------------------------------------------------------------- */
/* CREATE TOTAL SUPPORT TREE */
/** Commences start process of generating a total support tree (T-tree). */
public void createTotalSupportTree() {
System.out.println("APRIORI-T WITH X-CHECKING\n" +
"-------------------------");
System.out.println("Minimum support threshold = " +
twoDecPlaces(support) + "% " + "(" +
twoDecPlaces(minSupport) + " records)");
// Continue
createTotalSupportTree2();
}
/** Commences pstart rocess of generating a total support tree (T-tree): GUI
version.
@param textArea the given instance of the JTextArea class. */
public void createTotalSupportTree(JTextArea textArea) {
textArea.append("Minimum support threshold = " + support + "% " +
"(" + minSupport + " records)\n");
// Continue
createTotalSupportTree2();
}
/** Continues start process of generating a total support tree (T-tree). */
private void createTotalSupportTree2() {
// Set number of t-tree nodes to zero (this is a static field so will
// not be reset in repeat calls to the T-tree constructor).
TtreeNode.setNumberOfNodesFieldToZero();
// If no data (possibly as a result of an order and pruning operation)
// return
if (numOneItemSets==0) return;
// Initilise T-tree data structure and diagnostic counters.
startTtreeRef = null;
numFrequentsets = 0;
numUpdates = 0l;
double time1 = (double) System.currentTimeMillis();
// Create Top level of T-tree (First pass of dataset). Defined in
// in TotlaSupportTree class.
createTtreeTopLevel();
// Generate level 2
generateLevel2();
// Further passes of the dataset
createTtreeLevelN();
duration = getDuration(time1,(double) System.currentTimeMillis());
}
/* CREATE T-TREE TOP LEVEL */
/** Generates level 1 (top) of the T-tree. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startTtreeRef = new TtreeNode[numOneItemSets+1];
for (int index=1;index<=numOneItemSets;index++)
startTtreeRef[index] = new TtreeNode();
// Add support for each 1 itemset
createTtreeTopLevel2();
// Prune top level, setting any unsupported 1-itemsets to null
pruneLevelN(startTtreeRef,1);
}
/* CREATE T-TREE 2nd LEVEL */
/** Adds supports to level 1 (top) of the T-tree. */
protected void createTtreeTopLevel2() {
numLevelsInTtree = 1;
// Loop through data set record by record and add support for each
// 1 itemset
for (int index1=0;index1
Where all nodes are supported and we wish to add the third level we would
walk the tree and attempt to add new nodes to every level 2 node found.
Having found the correct level we step through starting from B (we cannot
add a node to A), so in this case there is only one node from which a level
3 node may be attached.
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree).
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@param itemSet the current itemset (T-tree node) under consideration. */
protected void generateLevelN(TtreeNode[] linkRef, int level,
short[] itemSet) {
int localSize = linkRef.length;
// Correct level
if (level == 1) {
// Step through T-tree array in current branch at current level
for (int index=2;index
where we wish to add a level 3 node to node (B), i.e. the node {A}, we
would proceed as follows:
Example 2, given:
where we wish to add a level 4 node (A) to (B) this would represent the
complete label {D,C,B,A}, the N-1 subsets will then be {{D,C,B},{D,C,A},
{D,B,A} and {C,B,A}}. We know the first two are supported because they are
contained in the current sub-branch of the T-tree, {D,B,A} and {C,B,A} are
not.
@param parentRef the reference to the level in the sub-branch of the T-tree
under consideration.
@param endIndex the index of the current node under consideration.
@param itemSet the complete label represented by the current node (required
to generate further itemsets to be X-checked). */
protected void generateNextLevel(TtreeNode[] parentRef, int endIndex,
short[] itemSet) {
parentRef[endIndex].childRef = new TtreeNode[endIndex]; // New level
short[] newItemSet;
// Generate a level in Ttree
TtreeNode currentNode = parentRef[endIndex];
// Loop through parent sub-level of siblings upto current node
for (int index=1;index itemSet1 = first two items in current item set
itemSet2 = remainder of items in current item set
Example 1:
Example 2:
Operates in a recursive manner.
Example 1: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C}
Example 2: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C,D}
Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given T-tree level (set to 1 at
start).
@param lastIndex the end index of the current T-tree level.
@param linRef the reference to the current T-tree level.
@return returns true if itemset found and false otherwise. */
private boolean findItemSetInTtree2(short[] itemSet, int index,
int lastIndex, TtreeNode[] linkRef) {
// Attribute at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If attribute at "index" is last element of item set then item set
// found
if (index == lastIndex) return(true);
// Otherwise continue
else if (linkRef[itemSet[index]].childRef!=null) {
return(findItemSetInTtree2(itemSet,index+1,lastIndex,
linkRef[itemSet[index]].childRef));
}
// No child branch, item set not in Ttree thererfore return false
else return(false);
}
// Item set not in Ttree
else return(false);
}
/* GET SUPPORT FOT ITEM SET IN T-TREE */
/** Commences process for finding the support value for the given item set
in the T-tree (which is know to exist in the T-tree). Used when
generating Association Rules (ARs). Note that itemsets are stored in
reverse order in the T-tree therefore the given itemset must be processed
in reverse.
@param itemSet the given itemset.
@return returns the support value (0 if not found). */
protected int getSupportForItemSetInTtree(short[] itemSet) {
int endInd = itemSet.length-1;
// Last element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startTtreeRef[itemSet[endInd]] != null) {
// If "current index" is 0, then this is the last element (i.e the
// input is a 1 itemset) and therefore item set found
if (endInd == 0) return(startTtreeRef[itemSet[0]].support);
// Otherwise continue down branch
else {
TtreeNode[] tempRef = startTtreeRef[itemSet[endInd]].childRef;
if (tempRef != null) return(getSupForIsetInTtree2(itemSet,
endInd-1,tempRef));
// No further branch therefore rerurn 0
else return(0);
}
}
// Item set not in Ttree thererfore return 0
else return(0);
}
/** Returns the support value for the given itemset if found in the T-tree
and 0 otherwise. Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given itemset.
@param linRef the reference to the current T-tree level.
@return returns the support value (0 if not found). */
private int getSupForIsetInTtree2(short[] itemSet, int index,
TtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If "current index" is 0, then this is the last element of the
// item set and therefore item set found
if (index == 0) return(linkRef[itemSet[0]].support);
// Otherwise continue provided there is a child branch to follow
else if (linkRef[itemSet[index]].childRef != null)
return(getSupForIsetInTtree2(itemSet,index-1,
linkRef[itemSet[index]].childRef));
else return(0);
}
// Item set not in Ttree therefore return 0
else return(0);
}
/* SET CHILD REF TO NULL */
/** Commences process of setting child reference for the indicated node to
null (used with respect to TFPC algorithm). Note that itemsets are
stored in reverse order in the T-tree therefore the given itemset must be
processed in reverse.
@param itemSet the given itemset. */
protected void setChildRefToNull(short[] itemSet) {
// First element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startTtreeRef[itemSet[0]] != null) {
int lastIndex = itemSet.length-1;
// If "current index" is 0, then this is the last element (i.e the
// input is a 1 itemset) and therefore item set found
if (lastIndex == 0) startTtreeRef[itemSet[0]] = null;
// Otherwise continue down branch
else if (startTtreeRef[itemSet[0]].childRef != null)
setChildRefToNull(itemSet,1,lastIndex,
startTtreeRef[itemSet[0]].childRef);
}
}
/** Sets the child reference for the indicated node to null.
@param itemSet the given itemset.
@param index the current index in the given itemset.
@param lastIndex the end index of the current T-tree level.
@param linRef the reference to the current T-tree level. */
private void setChildRefToNull(short[] itemSet, int index,
int lastIndex, TtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If attribute at "index" is last element of item set then item set
// found --- set child reference to null
if (index == lastIndex) linkRef[itemSet[index]] = null;
// Otherwise continue
else if (linkRef[itemSet[index]].childRef != null)
setChildRefToNull(itemSet,index+1,lastIndex,
linkRef[itemSet[index]].childRef);
}
}
/*----------------------------------------------------------------------- */
/* */
/* SERIALIZE T-TREE WITHOUT SUPPORT VALUES */
/* */
/*----------------------------------------------------------------------- */
/* SERIALIZE T-TREE WITHOUT SUPPORT VALUES */
/** Commences the process of serializing a T-tree, but without support
values. Used in with respect to distributed application of Toivonen's
negative boarder concept. Three elements per node: (1) label, (2) childRef
and (3) sibling ref. If no "null" childRef or siblingRef then -1 will be
inserted. */
protected void serializeTteeNoSupValues() {
int newIndex=0, linkIndex=0;
final int NODE_SIZE = 3;
// Dimension serialization array and initialise serialization index to 0
int size = countNumberOfTtreeNodes()*NODE_SIZE;
serializationArray = new int[size];
serializationRef =0;
System.out.println("Serialized Ttree (No sup values) = " + (size*4) +
" (bytes)");
// Loop through Ttree
for(int index=1;index Used in
distributed ARM application where the count distribution (CD) approach is
used.
@param level the required level. */
protected void serializeTteeLevelN(int level) {
int newIndex=0, linkIndex=0;
// Dimemsion serialization array
int arraySize = countNumTtreeNodesLevelN(0,level,startTtreeRef) *
(level+1);
serializationArray = new int[arraySize];
System.out.println("Serialized Ttree level = " + (arraySize*4) + " (bytes)");
// Initialise serialization index to 0
serializationRef=0;
// Serialize
serializeTteeLevelN(level,startTtreeRef,null);
}
/** Continues process of serializing all level N nodes of a T-tree.
@param level the required level.
@param linkRef the current location in the T-tree.
@param itemSet the itemSet so far*/
private void serializeTteeLevelN(int level, TtreeNode[] linkRef,
short[] itemSet) {
// No nodes in this sub-branch level?
if (linkRef == null) return;
// If at right level serilize nodes
if (level == 1) {
for(int index=1;index Used in distribute ARM algorithms where a
T-tree has been serialized to allow transmission through a JavaSpace.
@param linkRef the reference to the current level in the given T-tree
branch.
@param size the size of the current level in the given T-tree branch.
@param indexST the index of the current sibling in the serialization array.
@return the revise local T-tree reference. */
private TtreeNode[] mergeSerializationAndTtree(TtreeNode[] linkRef,
int size, int indexST) {
// if no serialised data for this level branch return
if (indexST == -1) return(linkRef);
// If serialised data but no level array in this Ttree Branch create
// new level array
if (linkRef == null) linkRef = new TtreeNode[size];
// Loop through serialization.
while (indexST != -1) {
// set value for current Ttree level array index
int index = serializationArray[indexST];
// Check if node exists, if not create one
if (linkRef[index] == null) {
linkRef[index] = new TtreeNode(serializationArray[indexST+1]);
numUpdates++;
}
// Otherwise update support
else {
linkRef[index].support = linkRef[index].support +
serializationArray[indexST+1];
numUpdates++;
}
// Process child branch
linkRef[index].childRef =
mergeSerializationAndTtree(linkRef[index].childRef,
index,serializationArray[indexST+2]);
// Increment serialization sibling index
indexST = serializationArray[indexST+3];
}
// Return
return(linkRef);
}
/* CONVERT SEIRIALIZATION TO T-TREE (WITHOUT SUPPORT VALUES). */
/** Commences process of converting a serialization representation of a
T-tree (without support values) to a "proper" Ttree. Used in distribute
ARM algorithms where */
protected void serialization2TtreeNoSupValues() {
int indexSibRef = 0;
// Create top level in T-tree
startTtreeRef = new TtreeNode[numOneItemSets+1];
// Process top level of serialization.
while (indexSibRef != -1) {
// set value for current Ttree level array index
int index = serializationArray[indexSibRef];
// Check if node exists, if not create one
if (startTtreeRef[index] == null) {
startTtreeRef[index] = new TtreeNode(0);
}
// Process child branch
startTtreeRef[index].childRef =
serialization2TtreeNoSupValue(startTtreeRef[index].childRef,
index,serializationArray[indexSibRef+1]);
// Increment serialization sibling index
indexSibRef = serializationArray[indexSibRef+2];
}
}
/** Continues process of converting a serialization representation of a
T-tree (without support values) to a "proper" Ttree. Used in
distribute ARM algorithms where a negative boarder T-tree has been
serialized to allow transmission through a JavaSpace.
@param linkRef the reference to the current level in the given T-tree
branch.
@param size the size of the current level in the given T-tree branch.
@param indexST the index of the current sibling in the serialization array.
@return the revise local T-tree reference. */
private TtreeNode[] serialization2TtreeNoSupValue(TtreeNode[] linkRef,
int size, int indexST) {
// if no serialised data for this level branch return
if (indexST == -1) return(linkRef);
// If serialised data but no level array in this Ttree Branch create
// new level array
if (linkRef == null) linkRef = new TtreeNode[size];
// Loop through serialization.
while (indexST != -1) {
// set value for current Ttree level array index
int index = serializationArray[indexST];
// Check if node exists, if not create one
if (linkRef[index] == null) {
linkRef[index] = new TtreeNode(0);
numUpdates++;
}
// Process child branch
linkRef[index].childRef =
serialization2TtreeNoSupValue(linkRef[index].childRef,
index,serializationArray[indexST+1]);
// Increment serialization sibling index
indexST = serializationArray[indexST+2];
}
// Return
return(linkRef);
}
/*----------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE (AR) GENERATION */
/* */
/*----------------------------------------------------------------------- */
/** Initiates process of generating Association Rules (ARs) by processing
the T-tree and storing rules above minimum confidence threshold in rule
list. */
public void generateARs() {
// Command line interface output
System.out.println("GENERATE ARs (support-confidence framework):\n" +
"--------------------------------------------");
// Set flag and rule data structure to null
supConfFworkFlag = true;
startRulelist = null;
// Generate
generateARs2();
// Reset flag
supConfFworkFlag = false;
}
/* GENERATE ASSOCIATION RULES (SUPPORT-LIFT FRAMEWORK) */
/** Initiates process of generating Association Rules (ARs) by processing
the T-tree and storing rules above minimum confidence threshold in rule
list. */
public void generateARsLift() {
// Command line interface output
System.out.println("GENERATE ARs (support-lift framework):\n" +
"--------------------------------------");
// Set flag and rule data structure to null
supLiftFworkFlag = true;
startRulelist = null;
// Generate
generateARs2();
// Reset flag
supLiftFworkFlag = false;
}
/** Initiaites process of generating Association Rules (ARs) from a T-tree:
GUI version.
@param textArea the given instance of the JTextArea class. */
public void generateARs(JTextArea textArea) {
// GUI output
textArea.append("GENERATE ARs (support-confidence framework):\n" +
"--------------------------------------------\n");
// Set flag and rule data structure to null
supConfFworkFlag = true;
startRulelist = null;
// Set rule data structure to null
startRulelist = null;
// Generate
generateARs2();
// Reset flag
supConfFworkFlag = false;
}
/** Loops through top level of T-tree as part of the AR generation
process. */
protected void generateARs2() {
// Loop
for (short index=1;index <= numOneItemSets;index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSoFar = new short[1];
itemSetSoFar[0] = index;
generateARs(itemSetSoFar,index,
startTtreeRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Continues process of generating association rules from a T-tree by
recursively looping through T-tree level by level.
@param itemSetSofar the label for a T-tree node as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array level in the T-tree. */
protected void generateARs(short[] itemSetSofar, int size,
TtreeNode[] linkRef) {
// If no more nodes return
if (linkRef == null) return;
// Otherwise process
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
// Temp itemset
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Generate ARs for current large itemset
generateARsFromItemset(tempItemSet,linkRef[index].support);
// Continue generation process
generateARs(tempItemSet,index,linkRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Generates all association rules for a given large item set found in a
T-tree structure. Called from generateARs method.
@param itemSet the given large itemset.
@param support the associated support value for the given large itemset. */
private void generateARsFromItemset(short[] itemSet, double support) {
// Determine combinations
short[][] combinations = combinations(itemSet);
// Loop through combinations
for(int index=0;index Used
in connection with the DOC algorithm. Dummy method the body
for which is given in the TotalSupportTreeNegBorder class.
@param newSupportReductionValue the new support reduction value. */
public void setSupportReductionValue(double newValue) {
}
/* GET SUPPORT REDUCTION VALUE */
/** Get value for support reduction value field (the magnitude by which the
support percentage threshold is to be reduced. Abstrat method the body
for which is given in the TotalSupportTreeNegBorder class.
@return the support reduction value. */
public double getSupportReductionValue() {
return(0.1);
}
/* COUNT T-TREE NODES */
/** Commences process of counting the number of nodes in the T-tree.
Not the same as counting the number of nodes created as some of these may
have been pruned.
@return the number of nodes. */
protected int countNumberOfTtreeNodes() {
int counter = 0;
// Loop
for (int index=1; index < startTtreeRef.length; index++) {
if (startTtreeRef[index] !=null) {
counter++;
counter = countNumberOfTtreeNodes(counter,
startTtreeRef[index].childRef);
}
}
// Return
return(counter);
}
/* COUNT T-TREE NODES IN N SUB-BRANCHES*/
/** Commences process of counting the number of nodes in a sequence of
T-tree sub-branches.
@return the number of nodes. */
private int countNumTtreeNodesInNbranches(int startIndex, int endIndex) {
int counter = 0;
// Loop
for (int index=startIndex; index <= endIndex; index++) {
if (startTtreeRef[index] !=null) {
counter++;
counter = countNumberOfTtreeNodes(counter,
startTtreeRef[index].childRef);
}
}
// Return
return(counter);
}
/** Continues process of counting number of nodes in a T-tree.
@param counter the count sofar.
@param linkref the reference to the current location in the T-tree.
@return the updated count. */
private int countNumberOfTtreeNodes(int counter,TtreeNode[] linkRef) {
// Check for empty branch/sub-branch.
if (linkRef == null) return(counter);
// Loop through current level of branch/sub-branch.
for (int index=1;index Operates in a recursive
manner.
@param textArea the given instance of the JTextArea class.
@param number the ID number of a particular node.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param linkRef the reference to the current array lavel in the T-tree. */
private void outputTtree(JTextArea textArea, String number,
String itemSetSofar, TtreeNode[] linkRef) {
// Set output local variables.
int num=1;
number = number + ".";
itemSetSofar = itemSetSofar + " ";
// Check for empty branch/sub-branch.
if (linkRef == null) return;
// Loop through current level of branch/sub-branch.
for (int index=1;index Operates in a recursive manner.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(int number, short[] itemSetSofar, int size,
TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
System.out.print("[" + number + "]");
outputItemSet(newItemSet);
System.out.println("= " + linkRef[index].support);
number = outputFrequentSets(number + 1,newItemSet,index,
linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* OUTPUT FREQUENT SETS (GUI VERSION) */
/** Commences the process of outputting the frequent sets contained in
the T-tree (GUI version).
@param textArea the given instance of the JTextArea class. */
public void outputFrequentSets(JTextArea textArea) {
int number = 1;
textArea.append("Format: [N] {I} = S, where N is a sequential " +
"number, I is the item set and S the support.\n");
// Loop
for (short index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSofar = new short[1];
itemSetSofar[0] = index;
textArea.append("[" + number + "]");
outputItemSet(textArea,itemSetSofar);
textArea.append("= " + startTtreeRef[index].support + "\n");
number = outputFrequentSets(textArea,number+1,itemSetSofar,
index,startTtreeRef[index].childRef);
}
}
}
// End
System.out.println("\n");
}
/** Outputs T-tree frequent sets (GUI version). Operates in a
recursive manner.
@param textArea the given instance of the JTextArea class.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(JTextArea textArea, int number,
short[] itemSetSofar, int size, TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
textArea.append("[" + number + "]");
outputItemSet(textArea,newItemSet);
textArea.append("= " + linkRef[index].support + "\n");
number = outputFrequentSets(textArea,number + 1,newItemSet,
index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/** Commences the process of outputting the frequent sets contained in
the T-tree as series of schema labels (GUI version).
@param textArea the given instance of the JTextArea class. */
public void outputFrequentSetsSchema(JTextArea textArea) {
int number = 1;
textArea.append("Format: [N] {I} = S, where N is a sequential " +
"number, I is the item set and S the support.\n");
// Loop
for (short index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSofar = new short[1];
itemSetSofar[0] = index;
textArea.append("[" + number + "]");
outputItemSetSchema(textArea,itemSetSofar);
textArea.append("= " + startTtreeRef[index].support + "\n");
number = outputFrequentSetsSchema(textArea,number+1,
itemSetSofar,index,startTtreeRef[index].childRef);
}
}
}
// End
System.out.println("\n");
}
/** Outputs T-tree frequent sets (GUI version). Operates in a
recursive manner.
@param textArea the given instance of the JTextArea class.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSetsSchema(JTextArea textArea, int number,
short[] itemSetSofar, int size, TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
textArea.append("[" + number + "]");
outputItemSetSchema(textArea,newItemSet);
textArea.append("= " + linkRef[index].support + "\n");
number = outputFrequentSetsSchema(textArea,number + 1,
newItemSet,index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* ------------------------------ */
/* 4. OUTPUT NUMBER FREQUENT SETS */
/* ------------------------------ */
/** Commences the process of counting and outputing number of supported
nodes in the T-tree. A supported set is assumed to be a non null node in
the T-tree. */
public void outputNumFreqSets() {
// If empty tree (i.e. no supported sets) do nothing
if (startTtreeRef== null) System.out.println("Number of frequent " +
"sets = 0");
// Otherwise count and output
else System.out.println("Number of frequent sets = " +
countNumFreqSets());
}
/* COUNT NUMBER OF FRQUENT SETS */
/** Commences process of counting the number of frequent (large/supported)
sets contained in the T-tree. */
protected int countNumFreqSets() {
// If empty tree return 0
if (startTtreeRef == null) return(0);
// Otherwise loop through T-tree starting with top level
int num=0;
for (int index=1; index <= numOneItemSets; index++) {
// Check for null valued top level Ttree node.
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport)
num = countNumFreqSets(index,
startTtreeRef[index].childRef,num+1);
}
}
// Return
return(num);
}
/** Counts the number of supported nodes in a sub branch of the T-tree.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@param num the number of frequent sets sofar. */
protected int countNumFreqSets(int size, TtreeNode[] linkRef, int num) {
if (linkRef == null) return(num);
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport)
num = countNumFreqSets(index,
linkRef[index].childRef,num+1);
}
}
// Return
return(num);
}
/* --------------------------------------------------- */
/* 5. OUTPUT NUMBER OF FREQUENT SETS PER T-TREE BRANCH */
/* --------------------------------------------------- */
/** Outputs the number of supported sets per T-tree branch descending from
the top-level of the tree. Used for diagnostic purposes. */
public void outputNumFreqSetsPerBranch() {
System.out.println("Number of frequent sets per branch");
for (int index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) {
System.out.println("(" + index + ")" + countNumFreqSets(index,
startTtreeRef[index].childRef,1));
}
}
}
/* --------------------------- */
/* 6. OUTPUT T-TREE STATISTICS */
/* --------------------------- */
/** Commences the process of outputting T-tree statistics (for diagnostic
purposes): (a) Storage, (b) Number of nodes on P-tree, (c) number of
partial support increments (updates) and (d) generation time. */
public void outputTtreeStats() {
System.out.println("T-TREE STATISTICS\n-----------------");
System.out.println(numLevelsInTtree + " Levels in T-tree");
System.out.println(TtreeNode.getNumberOfNodes() + " Total # nodes " +
"created");
System.out.println(numUpdates + " Total # support value increments");
System.out.println(countNumFreqSets() + " Final # frequent sets");
System.out.println(calculateStorage() + " Total storage (Bytes)" +
" on completion");
System.out.println(duration);
}
/** Commences the process of outputting T-tree statistics (GUI version).
@param textArea the given instance of the JTextArea class. */
public void outputTtreeStats(JTextArea textArea) {
textArea.append(numLevelsInTtree + " Levels in T-tree\n");
textArea.append(TtreeNode.getNumberOfNodes() + " Total # nodes " +
"created\n");
textArea.append(numUpdates + " Total # support value increments\n");
textArea.append(countNumFreqSets() + " Final # frequent sets\n");
textArea.append(calculateStorage() + " Total storage (Bytes)" +
" on completion\n");
}
/* --------------------------- */
/* 7. OUTPUT NUMBER OF UPDATES */
/* --------------------------- */
/** Outputs the number of update and number of nodes created during the
generation of the T-tree (the later is not the same as the number of
supported nodes). */
public void outputNumUpdates() {
System.out.println("Number of T-tree nodes created = " +
TtreeNode.getNumberOfNodes());
System.out.println("Number of T-tree Updates = " + numUpdates);
}
/* ----------------- */
/* 8. OUTPUT STORAGE */
/* ----------------- */
/** Commences the process of determining and outputting the storage
requirements (in bytes) for the T-tree. Example: Given ---
Used for
distributed T-tree applications. */
protected void outputSerializationArray() {
for (int index=0;index Operates in a
recursive manner.
@param number the ID number of a particular node.
@param itemSetSofar the label for a T-tree node as generated sofar.
@param linkRef the reference to the current array level in the T-tree. */
protected void outputSerializedTtree(String number, String itemSetSofar,
int indexSibRef) {
// Set output local variables.
int num=1;
number = number + ".";
itemSetSofar = itemSetSofar + " ";
// Check for empty branch/sub-branch.
if (indexSibRef == -1) return;
// Loop through current level of branch/sub-branch.
while (indexSibRef != -1) {
String label = new
Integer(serializationArray[indexSibRef]).toString();
System.out.println("[" + number + num + "] {" + itemSetSofar +
label + "} = " + serializationArray[indexSibRef+1]);
String newitemSet = itemSetSofar + label;
outputSerializedTtree(number + num,newitemSet,
serializationArray[indexSibRef+2]);
num++;
indexSibRef = serializationArray[indexSibRef+3];
}
}
/* ------------------------------------------------ */
/* 12. OUTPUT SERIALIZED T-TREE (NO SUPPORT VALUES) */
/* ------------------------------------------------ */
/** Commences process of outputting a serialised Level N Ttree structure
contents to screen where support values have been omitted. Used for
distributed T-tree applications. */
protected void outputSerializedTtreeNoSupValues() {
int number = 1;
int indexSibRef = 0;
while (indexSibRef != -1) {
String label = new
Integer(serializationArray[indexSibRef]).toString();
System.out.println("[" + number + "] {" + label + "}");
outputSerializedTtreeNoSupValues(new Integer(number).toString(),
label,serializationArray[indexSibRef+1]);
// Increment node number
number++;
indexSibRef = serializationArray[indexSibRef+2];
}
}
/** Continue process of outputting a serialised T-tree without support
values. Operates in a recursive manner.
@param number the ID number of a particular node.
@param itemSetSofar the label for a T-tree node as generated sofar.
@param linkRef the reference to the current array level in the T-tree. */
private void outputSerializedTtreeNoSupValues(String number, String itemSetSofar,
int indexSibRef) {
// Set output local variables.
int num=1;
number = number + ".";
itemSetSofar = itemSetSofar + " ";
// Check for empty branch/sub-branch.
if (indexSibRef == -1) return;
// Loop through current level of branch/sub-branch.
while (indexSibRef != -1) {
String label = new
Integer(serializationArray[indexSibRef]).toString();
System.out.println("[" + number + num + "] {" + itemSetSofar +
label + "}");
String newitemSet = itemSetSofar + label;
outputSerializedTtreeNoSupValues(number + num,newitemSet,
serializationArray[indexSibRef+1]);
num++;
indexSibRef = serializationArray[indexSibRef+2];
}
}
/* ------------------------------------ */
/* 13. OUTPUT SERIALIZED T-TREE LEVEL N */
/* ------------------------------------ */
/** Commences process of outputting a serialised Level N Ttree structure
contents to screen. Used for distributed T-tree applications.
@param level the required level*/
protected void outputSerializedTtreeLevelN(int level) {
for(int index=0,number=1;index
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (D)
|
|
(A) ----- (B) ----- (C)
|
|
(A) ----- (B)
currentItemSet = {A,B,C}
itemSet1 = {B,A} (change of ordering)
size = {A,B,C}-2 = 1
itemSet2 = {C} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C}
currentItemSet = {A,B,C,D}
itemSet1 = {B,A} (change of ordering)
itemSet2 = {C,D} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C,D}
@param currentItemSet the given itemset. */
protected boolean testCombinations(short[] currentItemSet) {
// No need to test 1- and 2-itemsets
if (currentItemSet.length < 3) return(true);
// Create itemSet1 (note ordering)
short[] itemSet1 = new short[2];
itemSet1[0] = currentItemSet[1];
itemSet1[1] = currentItemSet[0];
// Creat itemSet2
int size = currentItemSet.length-2;
short[] itemSet2 = removeFirstNelements(currentItemSet,2);
// Calculate combinations
return(combinations(null,0,2,itemSet1,itemSet2));
}
/* COMBINATIONS */
/** Determines the cardinality N combinations of a given itemset and then
checks whether those combinations are supported in the T-tree.
itemSet2.length = 1
endIndex = 2 greater than itemSet2.length if condition succeeds
tesSet = null+{B,A} = {B,A}
retutn true if {B,A} supported and null otherwise
endindex not greater than length {C,D}
go into loop
tempSet = {} + {C} = {C}
combinations with --- sofarSet={C}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {C} + {B,A} = {C,B,A}
tempSet = {} + {D} = {D}
combinations with --- sofarSet={D}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {D} + {B,A} = {D,B,A}
@param sofarSet The combination itemset generated so far (set to null at
start)
@param startIndex the current index in the given itemSet2 (set to 0 at
start).
@param endIndex The current index of the given itemset (set to 2 at start)
and incremented on each recursion until it is greater than the length of
itemset2.
@param itemSet1 The first two elements (reversed) of the total label for the
current item set.
@param itemSet2 The remainder of the current item set.
*/
private boolean combinations(short[] sofarSet, int startIndex,
int endIndex, short[] itemSet1, short[] itemSet2) {
// At level
if (endIndex > itemSet2.length) {
short[] testSet = append(sofarSet,itemSet1);
// If testSet exists in the T-tree sofar then it is supported
return(findItemSetInTtree(testSet));
}
// Otherwise
else {
short[] tempSet;
for (int index=startIndex;index
{1,2,3}
{1,2,3}
{1,2,3}
{1,2,3}
{1,2,3}
This will produce a T-tree as shown below:
+---+---+---+---+
| 0 | 1 | 2 | 3 |
+---+---+---+---+
| | |
| | +-----------+
| | |
| +---+ +---+---+---+
| | | 0 | 1 | 2 |
( 5 ) +---+---+ +---+---+---+
(nul) | 0 | 1 | | |
+---+---+ | +----+
| | |
| | +---+---+
( 5 ) | | 0 + 1 |
(nul) ( 5 ) +---+---+
(nul) |
|
( 5 )
(nul)
0 elements require 4 bytes of storage, null nodes (not shown above) 4 bytes
of storage, others 12 bytes of storage. */
public void outputStorage() {
// If empty tree (i.e. no supported sets) do nothing
if (startTtreeRef == null) return;
/* Otherwise calculate storage */
System.out.println("T-tree Storage = " + calculateStorage() +
" (Bytes)");
}
/* CALCULATE STORAGE */
/** Commences process of calculating storage requirements for T-tree. */
protected int calculateStorage() {
// If emtpy tree (i.e. no supported sets) return 0
if (startTtreeRef == null) return(0);
/* Step through top level */
int storage = 4; // For element 0
for (int index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) storage = storage + 12 +
calculateStorage(0,startTtreeRef[index].childRef);
else storage = storage+4;
}
// Return
return(storage);
}
/** Calculate storage requirements for a sub-branch of the T-tree.
@param localStorage the storage as calculated sofar (set to 0 at start).
@param linkRef the reference to the current sub-branch of the T-tree. */
private int calculateStorage(int localStorage, TtreeNode[] linkRef) {
if (linkRef == null) return(0);
for (int index=1; index < linkRef.length; index++) {
if (linkRef[index] !=null) localStorage = localStorage + 12 +
calculateStorage(0,linkRef[index].childRef);
else localStorage = localStorage + 4;
}
/* Return */
return(localStorage+4); // For element 0
}
/* ------------------ */
/* 9. OUTPUT DURATION */
/* ------------------ */
/* OUTPUT DURATION (GUI VERSION). */
/** Outputs difference between two given times.
@param textArea the given instance of the JTextArea class.
@param time1 the first time.
@param time2 the second time. */
public void outputDuration(JTextArea textArea, double time1, double time2) {
double duration = (time2-time1)/1000;
textArea.append("Generation time = " + twoDecPlaces(duration) +
" seconds (" + twoDecPlaces(duration/60) + " mins)\n");
}
/* ------------------------------ */
/* 10. OUTPUT SERIALISATION ARRAY */
/* ------------------------------ */
/** Outputs a serialised Ttree as a sequence of integers.