/* -------------------------------------------------------------------------- */ /* */ /* TOTAL SUPPORT TREE NEGATIVE BORDER */ /* */ /* Frans Coenen */ /* */ /* 10 January 2003 */ /* (Revisions 16/1/2003, 7/2/2003, 14/3/2003, 6/6/2003, 1/7/2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class Hierarchy: AssocRuleMining | +-- TotalSupportTree | +-- TotalSupportTreeNegBorder */ //package lucsKDD_ARM; /* Java packages */ import java.io.*; // Java GUI packages import javax.swing.*; /** Methods to implement the "Apriori-T" algorithm using a "Total support" tree data structure (T-tree) without X-checking and with the negative boarder concept such that the leaf nodes represent itemset that are just NOT supported.
In the "standard" T-tree the leaf nodes represent those itemsets which are just supported. Methods in this class are used to implement Toivonen's sampling ARM approach and distributed Apriori-T algorithms. @author Frans Coenen @version 5 July 2003 */ public class TotalSupportTreeNegBorder extends TotalSupportTree { /* ------ FIELDS ------ */ // CONSTANTS /** The start row identifier for a segement. */ protected static int START_SEGMENT = 0; // VARIABLE FIELDS /** Value by which minimum support threshold should be decreased to ensure that no frequent set are omitted. */ protected double supportReductionValue = 0.1; // Percent /** The global minsupport value held while some alternative is temporarily used in connection with the "negative boarder" approach. */ protected double tempMinSupport; /** Percentage of record to be included in preliminary segment. */ private static double PRELIMINARY_SIZE_PRECENTAGE = 0.25; // DATA STRUCTURES /** 2-D array to hold preliminary segement of input data. */ protected short[][] prelimSegDataArray = null; // FLAGS /** Temporary minimum support threshold reduced by one record (used for output only). */ private boolean reducedByOneFlag = false; /** Temporary minimum support threshold set to one record (used for output only). */ private boolean setToOneFlag = false; /* ------ CONSTRUCTORS ------ */ /** Processes command line arguments. */ public TotalSupportTreeNegBorder(String[] args) { super(args); } /** With argument from existing instance of class AssocRuleMining. */ public TotalSupportTreeNegBorder(AssocRuleMining armInstance) { super(armInstance); } /** Default constructor. */ public TotalSupportTreeNegBorder() { } /* ------ METHODS ------ */ /*----------------------------------------------------------------------- */ /* */ /* TREE BUILDING METHODS */ /* (Negative boarder applied to entire tree) */ /* WARNINMG: THIS IS NOT USUALLY DONE SO */ /* PROBABLY SAFE TO IGNRE THIS BIT OF CODING! */ /* */ /*----------------------------------------------------------------------- */ /* CREATE TOTAL SUPPORT TREE */ /** Commences start process of generating a total support tree (T-tree) with negative boarder concept applied to the entire tree.
Overridies parent method. */ public void createTotalSupportTree() { System.out.println("APRIORI-T WITH NEGATIVE BOARDER\n" + "-------------------------------"); System.out.println("Negative boarder concept applied to entire tree."); System.out.println("Minimum support threshold = " + support + "% " + "(" + minSupport + " records)"); // Recalculate minimum support so that it is temporarily lowered to // provide a "safety margin" when using the negative boarder principal double time1 = (double) System.currentTimeMillis(); calcReducedMinSup(); outputMinSupReduction(); // Continue createTotalSupportTree2(); duration = getDuration(time1,(double) System.currentTimeMillis()); } /** Commences start process of generating a total support tree (T-tree) using negative boarder concept applied to the entire tree, GUI version.
Overridies parent method.
@param textArea the given instance of the JTextArea class. */
public void createTotalSupportTree(JTextArea textArea) {
textArea.append("Negative boarder concept applied to entire tree.\n");
textArea.append("Minimum support threshold = " + support + "% " +
"(" + minSupport + " records)\n");
// Recalculate minimum support so that it is temporarily lowered to
// provide a "safety margin" when using the negative boarder principal
double time1 = (double) System.currentTimeMillis();
calcReducedMinSup();
outputMinSupReduction(textArea);
// Continue
createTotalSupportTree2();
duration = getDuration(time1,(double) System.currentTimeMillis());
}
/** Continues start process of generating a total support tree (T-tree)
using negative boarder concept. */
private void createTotalSupportTree2() {
// Create Top level of T-tree (First pass of dataset)
createTtreeTopLevel();
// Generate level 2 (including negative boarder concept)
generateLevel2();
// Further passes of the dataset
createTtreeLevelN(); // Parent method
// Reset minimum support level
minSupport = tempMinSupport;
}
/*----------------------------------------------------------------------- */
/* */
/* TREE BUILDING METHODS */
/* (Negative boarder applied to segment of tree) */
/* */
/*----------------------------------------------------------------------- */
/* CREATE TOTAL SUPPORT TREE */
/** Commences process of generating a total support tree (T-tree) using
negative boarder concept for a given segment of the data. */
public void createTotalSupportTreeNB() {
System.out.println("Apriori-T with Negative Border\n" +
"------------------------------");
System.out.println("Minimum support threshold = " + support + "% " +
"(" + minSupport + " records)");
// Recast input data into a preliminary segment and the remainder.
pruneInputData();
System.out.println(prelimSegDataArray.length + " size of start " +
"segment (" + (PRELIMINARY_SIZE_PRECENTAGE*100) +
"% of total # records)");
// Recalculate minimum support so that it takes into consideration
// the reduced number of records due to segmentation and so that it
// is temporarily lowered to provide a "safety margin" when using
// the negative boarder principal
calcReducedMinSup(START_SEGMENT,prelimSegDataArray.length);
outputMinSupReduction();
// Create initial negative boarder T-tree
createPreliminaryTtreeWithNB();
// Create Top level of T-tree (First pass of dataset)
addSupportToTtreeIfExists();
// Output
int numFreqSetsOnNB = getNumFreqSetsOnNB();
if (numFreqSetsOnNB>0) System.out.println("WARNING: ");
System.out.println("Number of frequent sets on negative boarder = " +
numFreqSetsOnNB + "\n");
}
/** Commences process of generating a total support tree (T-tree) using
negative boarder concept for a given segment of the data.
@param textArea the given instance of the JTextArea class. */
public void createTotalSupportTreeNB(JTextArea textArea) {
textArea.append("Minimum support threshold = " + support + "% " +
"(" + minSupport + " records)\n");
// Recast input data into a preliminary segment and the remainder.
pruneInputData();
textArea.append(prelimSegDataArray.length + " size of start " +
"segment (" + (PRELIMINARY_SIZE_PRECENTAGE*100) +
"% of total # records)\n");
// Recalculate minimum support so that it takes into consideration
// the reduced number of records due to segmentation and so that it is
// temporarily lowered to provide a "safety margin" when using the
// negative boarder principal
calcReducedMinSup(START_SEGMENT,prelimSegDataArray.length);
outputMinSupReduction(textArea);
// Create initial negatice boarder T-tree
createPreliminaryTtreeWithNB();
// Create Top level of T-tree (First pass of dataset)
addSupportToTtreeIfExists();
// Output
int numFreqSetsOnNB = getNumFreqSetsOnNB();
if (numFreqSetsOnNB>0) textArea.append("WARNING: ");
textArea.append("Number of frequent sets on negative boarder = " +
numFreqSetsOnNB + "\n");
}
/* CREATE PRELIMINARY TOTAL SUPPORT TREE */
/** Commences process of generating a preliminary total support tree
(T-tree) using negative boarder concept. */
private void createPreliminaryTtreeWithNB() {
short[][] tempDataArray = dataArray;
dataArray = prelimSegDataArray;
// Create Top level of T-tree (First pass of dataset)
createTtreeTopLevel();
// Generate level 2 (including negative boarder concept)
generateLevel2();
// Further passes of the dataset
createTtreeLevelN();
// Reset minimum support level
minSupport = tempMinSupport;
dataArray = tempDataArray;
}
/* ADD SUPPORT VALUES TO T-TREE IF NODE EXISTS */
/** Commences process of adding further support values to the nodes in an
existing negative boarder T-tree generated for a preliminary data segment. */
protected void addSupportToTtreeIfExists() {
// Loop through data set record by record
for (int index=0;index Operates in a recursive
manner.
@param linkRef The reference to the current sub-branch of T-tree (start at
top of tree). */
protected void pruneTtree(TtreeNode[] linkRef) {
// Check for empty branch
if (linkRef != null) {
// Loop through T-tree branch
for(int index=1;index Proceeds in a recursive manner level by level
until the required level is reached. Example, if we have a T-tree of the form:
Where all nodes are supported and we wish to add the third level we would
walk the tree and attempt to add new nodes to every level 2 node found.
Having found the correct level we step through starting from B (we cannot
add a node to A), so in this case there is only one node from which a level
3 node may be attached.
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree)
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@param itemSet the current itemset under consideration. */
protected void generateLevelN(TtreeNode[] linkRef, int level,
short[] itemSet) {
int localSize = linkRef.length;
// Correct level
if (level == 1) {
// Step through T-tree array in current branch at current level
for (int index=2;index Used when generating an entire T-tree with a negative
boarder. */
protected void calcReducedMinSup() {
reducedByOneFlag = false;
setToOneFlag = false;
// Store full minimum support
tempMinSupport = minSupport;
// If min support is already 1 it can not be reduced any further.
if (minSupport < 1.00000001) {
setToOneFlag = true;
return;
}
// Calculate reduced value
minSupport = numRows * (support-supportReductionValue/100.0)/100.0;
// Check if reduction is greater than one record, if not adjust new
// min support so that it is one less than the full minimum.
if ((tempMinSupport-minSupport) < 1.0) {
minSupport = tempMinSupport-1;
reducedByOneFlag = true;
}
// check that reduction is not less than one record, otherwise set to
// one
if (minSupport<1.0) {
minSupport=1.0;
reducedByOneFlag = false;
setToOneFlag = true;
}
}
/** Recalculates minimum support so that it is temporarily lowered, for a
given segment, by the value of the constant supportReductionValue
or 1 whatever is greater. Used when applying Toivonen's negative
boarder ARM method.
@param start the record number marking the start of the segment.
@param end the record number marking the end of the segment. */
protected void calcReducedMinSup(int start, int end) {
reducedByOneFlag = false;
setToOneFlag = false;
// Store full minimum support
tempMinSupport = minSupport;
// If min support is already 1 it can not be reduced any further.
if (minSupport < 1.00000001) {
setToOneFlag = true;
return;
}
// Calculate reduced value. Support is a percentage, reduction value is
// also a percentage. So if reduction value is 50% and support is 1%
// then support should be reduced to 0.5%. Min support is expressed in
// terms of a number of records.
int numRows = end-start+1;
minSupport = numRows * (support-supportReductionValue/100.0)/100.0;
// Check if reduction is greater than one record, if not adjust new
// min support so that it is one less than the full minimum.
if ((tempMinSupport-minSupport) < 1.0) {
minSupport = tempMinSupport-1;
reducedByOneFlag = true;
}
// It does not make sense to reduce the minimum support to less than
// one record, so check if min support is greater than one, if not
// adjust new min support so that it is set to one.
if (minSupport < 1.0) {
minSupport = 1.0;
reducedByOneFlag = false;
setToOneFlag = true;
}
}
/* PRUNE INPUT DATA */
/** Prunes input data set to give a preliminary set (used o determine T-tree
with negative boarder), and a revised data set with out the preliminary set
(used to determine additional support values). */
private void pruneInputData() {
// Dimension dataset: (i) Calculate size.
int sizeOfPrelimDataSet = 1 + (int) ((double) numRows*
PRELIMINARY_SIZE_PRECENTAGE);
prelimSegDataArray = new short[sizeOfPrelimDataSet][];
// Copy preliminary data.
int index1;
for (index1=0;index1
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)