/* -------------------------------------------------------------------------- */ /* */ /* F U Z Z Y A P R I O R I T */ /* */ /* Frans Coenen */ /* */ /* Tuesday 4 Match 2008 */ /* (Revised 22 August 2008) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /** Class that contains methods to support Fuzzy Association Rule Mining (FARM) based in the Apriori-T algorithm. */ /* Structure: AssocRuleMining | +-- TotalSupportTree | +-- FuzzyAprioriT */ //package lucsKDD_ARM; // Java packages import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; public class FuzzyAprioriT extends TotalSupportTree { /*------------------------------------------------------------------------*/ /* */ /* FIELDS */ /* */ /*------------------------------------------------------------------------*/ /** 2-D aray to hold input data from data file.
First index is row (record or
TID) number starting from 0, and second is attribute (column) number starting from
zero. */
protected FuzzyDataItem[][] dataArray = null;
/** Array used to renumber columns for input data in terms of frequency of single
attributes (reordering will enhance performance for some ARM algorithms). */
protected FuzzyDataItem[] conversionArray = null;
/** The reference to start of t-tree. */
protected FuzzyTtreeNode[] startTtreeRef;
/*---------------------------------------------------------------------*/
/* */
/* CONSTRUCTORS */
/* */
/*---------------------------------------------------------------------*/
/** With argument from existing instance of class AssocRuleMining.
@param newInstance the given instance of the AssocRuleMining
class. */
public FuzzyAprioriT(FuzzyAprioriT newInstance) {
super(newInstance);
dataArray = newInstance.dataArray;
}
/** Default constructor. */
public FuzzyAprioriT() {
}
/*-------------------------------------------------------------------*/
/* */
/* T-TREE BUILDING METHODS */
/* */
/*-------------------------------------------------------------------*/
/** Commences start process of generating a total support tree (T-tree): GUI
version. Min support is used in non-weighted Apriori-T and is the support in
terms of number of records, this has no meaning with respect to fuzzy
support as defined here. Min support therefore has the same value as the
support field.
@param textArea the given instance of the JTextArea class. */
public void createTotalSupportTree(JTextArea textArea) {
// Calculate support in terms of number of records
double tempSup = numRows*support;
textArea.append("Apriori-T with X-Cchecking\nMinimum support " +
"threshold = " + twoDecPlaces(support) + " (" +
twoDecPlaces(tempSup) + " records)\n");
// If no data (possibly as a result of an order and pruning operation)
// return
if (numOneItemSets==0) return;
// Initilise T-tree data structure and diagnostic counters. Set number
// of t-tree nodes to zero (this is a static field so will not be reset
// in repeat calls to the T-tree constructor).
startTtreeRef = null;
numFrequentSets = 0;
numUpdates = 0l;
TtreeNode.setNumberOfNodesFieldToZero();
// Continue
contCreateTtree(textArea);
// Potential output
if (outputTtreeStatsFlag) outputTtreeStats(textArea);
if (outputTtreeFlag) outputTtree(textArea);
//if (outputTtreeGraphFlag) drawTtreeGraph();
}
/* CREATE T-TREE TOP LEVEL */
/** Generates level 1 (top) of the T-tree. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startTtreeRef = new FuzzyTtreeNode[numOneItemSets+1];
for (int index=1;index<=numOneItemSets;index++)
startTtreeRef[index] = new FuzzyTtreeNode();
// Add support for each 1 itemset
createTtreeTopLevel2();
// Prune top level, setting any unsupported 1-itemsets to null
pruneLevelN(startTtreeRef,1);
}
/** Adds supports to level 1 (top) of the T-tree. */
protected void createTtreeTopLevel2() {
numLevelsInTtree = 1;
// Loop through data set record by record and add support for each
// 1 itemset
for (int index1=0;index1
Where all nodes are supported and we wish to add the third level we would
walk the tree and attempt to add new nodes to every level 2 node found.
Having found the correct level we step through starting from B (we cannot
add a node to A), so in this case there is only one node from which a level
3 node may be attached.
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree).
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@param itemSet the current itemset (T-tree node) under consideration. */
protected void generateLevelN(FuzzyTtreeNode[] linkRef, int level,
short[] itemSet) {
int localSize = linkRef.length;
// Correct level
if (level == 1) {
// Step through T-tree array in current branch at current level
for (int index=2;index
where we wish to add a level 3 node to node (B), i.e. the node {A}, we
would proceed as follows:
Example 2, given:
where we wish to add a level 4 node (A) to (B) this would represent the
complete label {D,C,B,A}, the N-1 subsets will then be {{D,C,B},{D,C,A},
{D,B,A} and {C,B,A}}. We know the first two are supported because they are
contained in the current sub-branch of the T-tree, {D,B,A} and {C,B,A} are
not.
@param parentRef the reference to the level in the sub-branch of the T-tree
under consideration.
@param endIndex the index of the current node under consideration.
@param itemSet the complete label represented by the current node (required
to generate further itemsets to be X-checked). */
protected void generateNextLevel(FuzzyTtreeNode[] parentRef, int endIndex,
short[] itemSet) {
parentRef[endIndex].childRef = new FuzzyTtreeNode[endIndex]; // New level
short[] newItemSet;
// Generate a level in Ttree
FuzzyTtreeNode currentNode = parentRef[endIndex];
// Loop through parent sub-level of siblings upto current node
for (int index=1;index Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given itemset.
@param linRef the reference to the current Fuzzy T-tree level.
@return returns the support value (0 if not found). */
private double getSupForIsetInTtree2(short[] itemSet, int index,
FuzzyTtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If "current index" is 0, then this is the last element of the
// item set and therefore item set found
if (index == 0) return(linkRef[itemSet[0]].support);
// Otherwise continue provided there is a child branch to follow
else if (linkRef[itemSet[index]].childRef != null)
return(getSupForIsetInTtree2(itemSet,index-1,
linkRef[itemSet[index]].childRef));
else return(0);
}
// Item set not in Ttree therefore return 0
else return(0);
}
/* FIND ITEM SET IN T-TREE */
/** Commences process of determining if an itemset exists in a T-tree.
Used to X-check existence of Ttree nodes when generating new levels of the
Tree. Note that T-tree node labels are stored in "reverse", e.g. {3,2,1}.
@param itemSet the given itemset (IN REVERSE ORDER).
@return returns true if itemset found and false otherwise. */
protected boolean findItemSetInTtree(short[] itemSet) {
// first element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startTtreeRef[itemSet[0]] != null) {
int lastIndex = itemSet.length-1;
// If "current index" is 0, then this is the last element (i.e the
// input is a 1 itemset) and therefore item set found
if (lastIndex == 0) return(true);
// Otherwise continue down branch
else if (startTtreeRef[itemSet[0]].childRef!=null) {
return(findItemSetInTtree2(itemSet,1,lastIndex,
startTtreeRef[itemSet[0]].childRef));
}
else return(false);
}
// Item set not in Ttree
else return(false);
}
/*-------------------------------------------------------------------*/
/* */
/* INPUT FILE HANDLING METHODS */
/* */
/*-------------------------------------------------------------------*/
/* INPUT DATA SET */
/** Commences process of getting input data.
@param textArea the text area in the GUI used for output.
@param fName the name of the input file to be read. */
public void inputDataSet(JTextArea textArea, File fName) {
// Set filePath instance field
filePath = fName;
// Read the file (method below overwrites metjhod in Association Rules
// Class
readFile(textArea);
// Once input file has been read, processed and stored in data array
// count number of columns. Input format flag set during readFile process.
if (inputFormatOkFlag) {
countNumCols();
textArea.append("Number of columns = " + numCols + "\n");
// Set have data flag to true
haveDataFlag = true;
}
else {
textArea.append("Operation failed!\n");
dataArray=null;
}
}
/* COUNT NUMBER OF COLUMNS */
/** Counts number of columns represented by input data. */
protected void countNumCols() {
short maxAttribute=0;
// Loop through data array
for(int index=0;index Proceeds as follows:
If either the output schema or the input
file has nor been loaded then method will return false.
@return true if number of attributes are the same and false otherwise. */
public boolean checkSchemaVdata() {
boolean schemaAndDataAttsSame = true;
// Check schema
if (outputSchema==null) {
JOptionPane.showMessageDialog(null,"No output schema file.",
"CHECK SCHEMA v DATA ATTRIBUTES ERROR",
JOptionPane.ERROR_MESSAGE);
return(!schemaAndDataAttsSame);
}
// Check data array
if (dataArray==null) {
JOptionPane.showMessageDialog(null,"No input data file.",
"CHECK SCHEMA v DATA ATTRIBUTES ERROR",
JOptionPane.ERROR_MESSAGE);
return(!schemaAndDataAttsSame);
}
// Check lengths.
if (outputSchema.length==numCols) return(schemaAndDataAttsSame);
else {
JOptionPane.showMessageDialog(null,"Number of attributes in " +
"schema file (" + outputSchema.length + ") not\n" +
"same as number of attributes in data file (" +
numCols + ")\n","CHECK SCHEMA v DATA ATTRIBUTES ERROR",
JOptionPane.ERROR_MESSAGE);
return(!schemaAndDataAttsSame);
}
}
/*----------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE (AR) GENERATION */
/* */
/*----------------------------------------------------------------------- */
/** Loops through top level of T-tree as part of the AR generation
process. */
protected void generateARs2() {
//System.out.println("GenerateARs2: confidence = " + confidence);
// Loop
for (short index=1;index<=numOneItemSets;index++) {
if (startTtreeRef[index]!=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSoFar = new short[1];
itemSetSoFar[0] = index;
generateARs(itemSetSoFar,index,
startTtreeRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Continues process of generating association rules from a T-tree by
recursively looping through T-tree level by level.
@param itemSetSofar the label for a T-tree node as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree. */
protected void generateARs(short[] itemSetSofar, int size,
FuzzyTtreeNode[] linkRef) {
// If no more nodes return
if (linkRef == null) return;
// Otherwise process
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
// Temp itemset
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Generate ARs for current large itemset
generateARsFromItemset(tempItemSet,linkRef[index].support);
// Continue generation process
generateARs(tempItemSet,index,linkRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Generates all association rules for a given large item set found in a
T-tree structure. Called from generateARs method.
@param itemSet the given large itemset.
@param support the associated support value for the given large itemset. */
protected void generateARsFromItemset(short[] itemSet, double support) {
// Determine combinations
short[][] combinations = combinations(itemSet);
// Loop through combinations
for(int index=0;index
Example, given the data set:
Operates in a recursive
manner.
@param textArea the given instance of the JTextArea class.
@param number the ID number of a particular node.
@param itemSetSofar the label for a T-tree node as generated sofar.
@param linkRef the reference to the current array level in the T-tree. */
private void outputTtree(JTextArea textArea, String number,
short[] itemSetSofar, FuzzyTtreeNode[] linkRef) {
// Set output local variables.
int num=1;
number = number + ".";
// Check for empty branch/sub-branch.
if (linkRef == null) return;
// Loop through current level of branch/sub-branch.
for (short index=1;index Operates in a
recursive manner.
@param textArea the given instance of the JTextArea class.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(JTextArea textArea, int number,
short[] itemSetSofar, int size, FuzzyTtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
textArea.append("[" + number + "]");
outputItemSet(textArea,newItemSet);
textArea.append("= " + (twoDecPlaces(linkRef[index].support)) + "\n");
number = outputFrequentSets(textArea,number + 1,newItemSet,
index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/** Commences the process of outputting the frequent sets contained in
the T-tree as series of schema labels (GUI version).
@param textArea the given instance of the JTextArea class. */
public void outputFrequentSetsSchema(JTextArea textArea) {
// Check that we have a schema
if (!hasOutputSchemaFlag) {
textArea.append("No schema found!\nOperation failed\n");
return;
}
// Proceed
int number = 1;
textArea.append("Format: [N] {I} = S, where N is a sequential " +
"number, I is the item set and S the support.\n");
// Loop
for (short index=1; index <= numOneItemSets; index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSofar = new short[1];
itemSetSofar[0] = index;
textArea.append("[" + number + "]");
outputItemSetSchema(textArea,itemSetSofar);
textArea.append("= " + (twoDecPlaces(startTtreeRef[index].support)) + "\n");
number = outputFrequentSetsSchema(textArea,number+1,
itemSetSofar,index,startTtreeRef[index].childRef);
}
}
}
// End
System.out.println("\n");
}
/** Outputs T-tree frequent sets (GUI version). Operates in a
recursive manner.
@param textArea the given instance of the JTextArea class.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array level in the T-tree.
@param linkRef the reference to the current array level in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSetsSchema(JTextArea textArea, int number,
short[] itemSetSofar, int size, FuzzyTtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (short index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
short[] newItemSet = realloc2(itemSetSofar,index);
textArea.append("[" + number + "]");
outputItemSetSchema(textArea,newItemSet);
textArea.append("= " + (twoDecPlaces(linkRef[index].support)) + "\n");
number = outputFrequentSetsSchema(textArea,number + 1,
newItemSet,index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* ------------------------ */
/* OUTPUT CONVERSION ARRAYS */
/* ------------------------ */
/** Outputs conversion array (used to renumber columns for input data
in terms of frequency of single attributes --- reordering will enhance
performance for some ARM algorithms). */
public void outputConversionArrays() {
// Conversion array
System.out.println("CONVERSION ARRAY\nindex = old attribute number\n" +
"
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (D)
|
|
(A) ----- (B) ----- (C)
|
|
(A) ----- (B)
@param textArea the text area in the gui used for output. */
public void readFile(JTextArea textArea) {
try {
// Assume format OK by default
inputFormatOkFlag = true;
// Dimension data structure. Get number of lines method is in
// AssociationRules class, this calls the check line method below
// which sets the input format OK flag.
numRows = getNumberOfLines(fileName);
textArea.append("Number of records = " + numRows + "\n");
// If OK dimension and populate data array
if (inputFormatOkFlag) {
dataArray = new FuzzyDataItem[numRows][];
// Read file and process input line methods below.
textArea.append("Reading input file:\n" + filePath + "\n");
readInputDataSet();
}
// Else error
else {
haveDataFlag = false;
textArea.append("Error reading file:\n" + filePath + "\n\n");
}
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error reading File",
"Error: ",JOptionPane.ERROR_MESSAGE);
textArea.append("Error reading File\n");
closeFile();
// Set have data flag to false
haveDataFlag = false;
}
}
/* CHECK LINE */
/** Check whether given line from input file is of appropriate format
(space separated integers or attribute number and weighting tuples), if
incorrectly formatted line found inputFormatOkFlag set to
false.
@param counter the line number in the input file.
@param str the current line from the input file. */
protected void checkLine(int counter, String str) {
for (int index=0;index
1 2 5
1 2 3
2 4 5
1 2 5
2 3 5
This would produce a countArray (ignore index 0 because there is no
attributr number 0):
+---+---+---+---+---+---+
| | 1 | 2 | 3 | 4 | 5 |
+---+---+---+---+---+---+
| | 3 | 5 | 2 | 1 | 4 |
+---+---+---+---+---+---+
Which sorts to:
+---+---+---+---+---+---+
| | 2 | 5 | 1 | 3 | 4 |
+---+---+---+---+---+---+
| | 5 | 4 | 3 | 2 | 1 |
+---+---+---+---+---+---+
Giving rise to the conversion Array of the form (no index 0):
+---+---+---+---+---+---+
| | 3 | 1 | 4 | 5 | 2 |
+---+---+---+---+---+---+
| | 3 | 5 | 2 | 1 | 4 |
+---+---+---+---+---+---+
Note that the first row gives the new attribute number (old attribute
number is the index). The second row here are the counts used to identify
the ordering but which now no longer play a role in the conversion
exercise. Thus the new column (attriburte) number for column/attribute 1 is
column 3 (i.e. the first vale at index 1). The reconversion array will be
of the form (values are the indexes from the conversion array while indexes
represent the first vlaue from the conversion array):
+---+---+---+---+---+---+
| | 2 | 5 | 1 | 3 | 4 |
+---+---+---+---+---+---+
For example to convert the attribute number 3 back to its original number
we look up the value at index 3.
*/
public void idInputDataOrdering() {
// Count singles and store in countArray;
FuzzyDataItem[] countArray = countOneItemSets();
// Bubble sort count array on support value (second index)
orderCountArray(countArray);
//for (int index=1;index