/* ------------------------------------------------------------------------- */ /* */ /* D I C T O T A L S U P P O R T T R E E */ /* */ /* Frans Coenen */ /* */ /* 21 March 2001 */ /* (Revise: 20/7/2006, 19/10/2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* ------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree | +-- DIC_Ttree */ // Dept Computer Science, University of Liverpool // java -mx240000000 DataMiningApp FileName Cols Rowes /* Java packages */ import java.io.*; import java.util.*; /* Java GUI packages */ import javax.swing.*; class DIC_Ttree extends TotalSupportTree { // ------------------- FIELDS ------------------------ // Data structures /** The reference to start of the DIC T-tree. */ private DIC_TtreeNode[] startDIC_TtreeRef = null; // Fields /** Number of records in a block for the DIC algorithm in terms of a percentage of the total number of records avialble, 25% by default. */ private double blockSize = 25.0; /** Number of records in a block for the DIC algorithm. */ private int numRowsInBlock = 0; /** Total number of blocks, set to zero by default. */ private int totalBlocks = 0; /** Counter for number of passes through data set. */ private int numPassesThroughDataSet=1; /** Strat index for current block. */ private int startIndex = 0; /** Flag set to 0 if not at end of block. */ private boolean notAtEnd = true; /** Instance of JTextArea class. */ private JTextArea textArea = null; /*--------------------------------------------------------------------------*/ /* */ /* CONSTRUCTORS */ /* */ /*--------------------------------------------------------------------------*/ /** One argument constructor with command line arguments to be process. @param armInstance the given instance of the AssocRuleMining class. */ public DIC_Ttree(AssocRuleMining armInstance) { super(armInstance); } /*--------------------------------------------------------------------------*/ /* */ /* T-TREE BUILDING METHODS */ /* */ /*--------------------------------------------------------------------------*/ /* CREATE TOTAL SUPPORT TREE */ /** Commences start process of generating a total support tree (T-tree). */ public void createTotalSupportTree() { System.out.println("DIC WITH T-TREE\n----------------"); // Calculate minimum support in terms of number of rows System.out.println("Minimum support threshold = " + twoDecPlaces(support) + "% " + "(" + twoDecPlaces(minSupport) + " records)"); System.out.println("Block size = " + twoDecPlaces(blockSize) + "%"); System.out.println("Block size = " + numRowsInBlock + " records"); System.out.println("Num. blocks = " + totalBlocks); // Generate tree createTotalSupportTree2(); // Determine maximum number of levels in the DIC T-tree (for output // purposes). numLevelsInTtree = getMaxNumTtreeLevels(); System.out.println("Number of passes through data set = " + numPassesThroughDataSet); } /** Commences start process of generating a total support tree (T-tree), GUI version. @param tArea the given instance of the JTextArea class. */ public void createTotalSupportTree(JTextArea tArea) { textArea = tArea; // Calculate minimum support in terms of number of rows textArea.append("Minimum support threshold = " + twoDecPlaces(support) + "% " + "(" + twoDecPlaces(minSupport) + " records)\n"); textArea.append("Block size = " + twoDecPlaces(blockSize) + "%\n"); textArea.append("Block size = " + numRowsInBlock + " records\n"); textArea.append("Num. blocks = " + totalBlocks + "\n"); // Generate tree createTotalSupportTree2(); // Determine maximum number of levels in the DIC T-tree (for output // purposes). numLevelsInTtree = getMaxNumTtreeLevels(); textArea.append("Number of passes through data set = " + numPassesThroughDataSet + "\n"); } /* CREATE TOTAL SUPPORT TREE 2 */ /** Continues start process of generating a total support tree (T-tree).
Overides similar method in parent class. */ private void createTotalSupportTree2() { // Set number of t-tree nodes to zero (this is a static field so will // not be reset in repeat calls to the T-tree constructor). DIC_TtreeNode.setNumberOfNodesFieldToZero(); // If no data (possibly as a result of an order and pruning operation) // return if (numOneItemSets==0) return; // Initilise DIC T-tree data structure and diagnostic counters. startDIC_TtreeRef = null; numFrequentSets = 0; numUpdates = 0l; double time1 = (double) System.currentTimeMillis(); // Create Top level of DIC T-tree (First pass of dataset). createTtreeTopLevel(); // Procced dicAlgorithm(); } /* CREATE T-TREE TOP LEVEL */ /** Generates level 1 (top) of the T-tree.
Overides method in parent
class. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startDIC_TtreeRef = new DIC_TtreeNode[numOneItemSets+1];
for (int index=1;index<=numOneItemSets;index++)
startDIC_TtreeRef[index] = new DIC_TtreeNode();
}
/* DIC ALGORITHM */
/** Procced with DIC algorithm. */
private void dicAlgorithm() {
// Loop untill all item sets have been counted
while (notAtEnd) {
// Read first block of records
readBlock();
// Update settings
notAtEnd = false;
updateSettings(startDIC_TtreeRef);
}
}
/* READ BLOCK */
/** Reads a blocvk of data. */
private void readBlock() {
// Calculate end index
int endIndex = startIndex+numRowsInBlock;
// Check end index and adjust if required
if (endIndex > numRows) endIndex=numRows;
// Loop through block of records
for (int index=startIndex;index Used
when generating Association Rules (ARs). Note that itemsets are stored in
reverse order in the T-tree therefore the given itemset must be processed
in reverse.
@param itemSet the given itemset.
@return returns the support value (0 if not found). */
protected int getSupportForItemSetInTtree(short[] itemSet) {
int endInd = itemSet.length-1;
// Last element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startDIC_TtreeRef[itemSet[endInd]] != null) {
// If "current index" is 0, then this is the last element (i.e the
// input is a 1 itemset) and therefore item set found
if (endInd == 0) return(startDIC_TtreeRef[itemSet[0]].support);
// Otherwise continue down branch
else {
DIC_TtreeNode[] tempRef = startDIC_TtreeRef[itemSet[endInd]].childRef;
if (tempRef != null) return(getSupForIsetInTtree2(itemSet,
endInd-1,tempRef));
// No further branch therefore rerurn 0
else return(0);
}
}
// Item set not in Ttree thererfore return 0
else return(0);
}
/** Returns the support value for the given itemset if found in the DIC
T-tree and 0 otherwise. Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given itemset.
@param linRef the reference to the current DIC T-tree level.
@return returns the support value (0 if not found). */
private int getSupForIsetInTtree2(short[] itemSet, int index,
DIC_TtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If "current index" is 0, then this is the last element of the
// item set and therefore item set found
if (index == 0) return(linkRef[itemSet[0]].support);
// Otherwise continue provided there is a child branch to follow
else if (linkRef[itemSet[index]].childRef != null)
return(getSupForIsetInTtree2(itemSet,index-1,
linkRef[itemSet[index]].childRef));
else return(0);
}
// Item set not in Ttree therefore return 0
else return(0);
}
/*----------------------------------------------------------------------- */
/* */
/* SET METHODS */
/* */
/*----------------------------------------------------------------------- */
/** Set block size and total number of blocks.
@param bSize the given block size in terms of a percebntage of the
number of available rows. */
public void setBlockSize(double bSize) {
blockSize = bSize;
// Calculate number of rows to ablock
numRowsInBlock = (int) ((double) numRows*(blockSize/100));
// Set total number of blocks
totalBlocks = numRows/numRowsInBlock;
if ((numRows % numRowsInBlock) != 0) totalBlocks++;
}
/*----------------------------------------------------------------------- */
/* */
/* GET METHODS */
/* */
/*----------------------------------------------------------------------- */
/** Gets the reference to the start of the DIC R-tree.
@return the desired reference to the start of the DIC R-tree. */
public DIC_TtreeNode[] getStartDIC_TtreeRef() {
return(startDIC_TtreeRef);
}
/** Gets the maximum number of DIC T-tree levels.
@return the maximum number of levels in the T-tree. */
private int getMaxNumTtreeLevels() {
int maxLevel = 0;
// Check
if (startDIC_TtreeRef==null) return(maxLevel);
// Loop
int curentLevel=1;
for (short index=1;index Operates in a recursive manner.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(int number, short[] itemSetSofar, int size,
DIC_TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
outputDIC_TtreeNode(new Integer(number).toString(),
newItemSet,linkRef[index].support);
number = outputFrequentSets(number + 1,newItemSet,index,
linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* OUTPUT FREQUENT SETS */
/** Commences the process of outputting the frequent sets contained in
the DIC T-tree as series of schema labels. */
public void outputFrequentSetsSchema() {
int number = 1;
String s = "Format: [N] {I} = S, where N is a sequential " +
"number, I is the item set and S the support.\n";
if (textArea==null) System.out.print(s);
else textArea.append(s);
// Loop
for (short index=1; index <= numOneItemSets; index++) {
if (startDIC_TtreeRef[index] !=null) {
if (startDIC_TtreeRef[index].support >= minSupport) {
short[] itemSetSofar = new short[1];
itemSetSofar[0] = index;
outputDIC_TtreeNodeSchema(new Integer(number).toString(),
itemSetSofar,startDIC_TtreeRef[index].support);
number = outputFrequentSetsSchema(number+1,itemSetSofar,
index,startDIC_TtreeRef[index].childRef);
}
}
}
// End
System.out.println("\n");
}
/** Outputs DIC T-tree frequent sets. Operates in a recursive manner.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSetsSchema(int number, short[] itemSetSofar,
int size, DIC_TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
outputDIC_TtreeNodeSchema(new Integer(number).toString(),
newItemSet,linkRef[index].support);
number = outputFrequentSetsSchema(number + 1,newItemSet,
index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* ------------------------------ */
/* 3. OUTPUT NUMBER FREQUENT SETS */
/* ------------------------------ */
/** Commences the process of counting and outputing number of supported
nodes in the T-tree. A supported set is assumed to be a non null node
in the T-tree. */
public void outputNumFreqSets() {
// If empty tree (i.e. no supported sets) do nothing
if (startDIC_TtreeRef== null) {
String s = "Number of frequent sets = 0\n";
if (textArea==null) System.out.println(s);
else textArea.append(s);
}
// Otherwise count and output
else {
String s = "Number of frequent sets = " + countNumFreqSets() + "\n";
if (textArea==null) System.out.println(s);
else textArea.append(s);
}
}
/* COUNT NUMBER OF FRQUENT SETS */
/** Commences process of counting the number of frequent (large/supported)
sets contained in the DIC T-tree. */
protected int countNumFreqSets() {
// If empty tree return 0
if (startDIC_TtreeRef == null) return(0);
// Otherwise loop through T-tree starting with top level
int num=0;
for (int index=1; index <= numOneItemSets; index++) {
// Check for null valued top level Ttree node.
if (startDIC_TtreeRef[index] !=null) {
if (startDIC_TtreeRef[index].support >= minSupport)
num = countNumFreqSets(index,
startDIC_TtreeRef[index].childRef,num+1);
}
}
// Return
return(num);
}
/** Counts the number of supported nodes in a sub branch of the DIC T-tree.
@param size the length/size of the current array level in the DIC T-tree.
@param linkRef the reference to the current array level in the DIC T-tree.
@param num the number of frequent sets sofar. */
protected int countNumFreqSets(int size, DIC_TtreeNode[] linkRef, int num) {
if (linkRef == null) return(num);
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport)
num = countNumFreqSets(index,
linkRef[index].childRef,num+1);
}
}
// Return
return(num);
}
/* --------------------------- */
/* 4. OUTPUT NUMBER OF UPDATES */
/* --------------------------- */
/* OUTPUT NUMBER UPDATES */
/** Outputs the number of update and number of nodes created during the
generation of the T-tree (the later is not the same as the number of
supported nodes). */
public void outputNumUpdates() {
System.out.println("Number of DIC T-tree nodes created = " +
DIC_TtreeNode.getNumberOfNodes());
System.out.println("Number of DIC T-tree Updates = " + numUpdates);
}
/* --------------------------- */
/* 5. OUTPUT T-TREE STATISTICS */
/* --------------------------- */
/** Commences the process of outputting T-tree statistics (for diagnostic
purposes): (a) Storage, (b) Number of nodes on P-tree, (c) number of
partial support increments (updates) and (d) generation time. */
public void outputTtreeStats() {
if (textArea==null) {
System.out.println("DIC T-TREE STATISTICS\n---------------------");
System.out.println(numLevelsInTtree + " Levels in DIC T-tree");
System.out.println(DIC_TtreeNode.getNumberOfNodes() +
" Total # nodes created");
System.out.println(numUpdates + " Total # support value " +
"increments");
System.out.println(countNumFreqSets() + " Final # frequent sets");
System.out.println(calculateStorage() + " Total storage (Bytes)" +
" on completion");
System.out.println("-----------------------------------");
}
else outputTtreeStats(textArea);
}
/** Commences the process of outputting T-tree statistics (GUI version).
@param textArea the given instance of the JTextArea class. */
public void outputTtreeStats(JTextArea textArea) {
textArea.append("DIC T-TREE STATISTICS\n---------------------\n");
textArea.append(numLevelsInTtree + " Levels in DIC T-tree\n");
textArea.append(DIC_TtreeNode.getNumberOfNodes() + " Total # nodes " +
"created\n");
textArea.append(numUpdates + " Total # support value increments\n");
textArea.append(countNumFreqSets() + " Final # frequent sets\n");
textArea.append(calculateStorage() + " Total storage (Bytes)" +
" on completion\n");
textArea.append("-----------------------------------\n");
}
/* ----------------- */
/* 6. OUTPUT STORAGE */
/* ----------------- */
/** Commences the process of determining and outputting the storage
requirements (in bytes) for the T-tree. */
public void outputStorage() {
// If empty tree do nothing
if (startDIC_TtreeRef == null) return;
/* Otherwise calculate storage */
String s = "T-tree Storage = " + calculateStorage() +
" (Bytes)\n";
if (textArea==null) System.out.print(s);
else textArea.append(s);
}
/* CALCULATE STORAGE */
/** Commences process of calculating storage requirements for T-tree. */
protected int calculateStorage() {
// If emtpy tree (i.e. no supported sets) return 0
if (startDIC_TtreeRef == null) return(0);
/* Step through top level */
int storage = 4; // For element 0
for (int index=1; index <= numOneItemSets; index++) {
if (startDIC_TtreeRef[index] !=null) storage = storage + 18 +
calculateStorage(0,startDIC_TtreeRef[index].childRef);
else storage = storage+4;
}
// Return
return(storage);
}
/** Calculate storage requirements for a sub-branch of the DIC T-tree.
@param localStorage the storage as calculated sofar (set to 0 at start).
@param linkRef the reference to the current sub-branch of the DIC
T-tree. */
private int calculateStorage(int localStorage, DIC_TtreeNode[] linkRef) {
if (linkRef == null) return(0);
// Loop through current level in current sub-nranch of DIC T-tree
for (int index=1; index < linkRef.length; index++) {
if (linkRef[index] !=null) localStorage = localStorage + 18 +
calculateStorage(0,linkRef[index].childRef);
else localStorage = localStorage + 4;
}
/* Return */
return(localStorage+4); // For element 0
}
}