/* -------------------------------------------------------------------------- */ /* */ /* ASSOCIATION RULE DATA MINING */ /* */ /* Frans Coenen */ /* */ /* Monday 15 June 2003 */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ import java.io.*; import java.util.*; import java.awt.*; import java.awt.event.*; import javax.swing.*; // Other packages public class AprioriTgui extends JFrame implements ActionListener{ /* ------ FIELDS ------ */ // GUI features private BufferedReader fileInput; private JTextArea textArea; private JButton openButton, minSupport, runButton; /** Ttree node structure.
Arrays of these structures are used to store
nodes at the same level in any sub-branch of the T-tree. */
protected class TtreeNode {
/** The support associate wuth the itemset represented by the node. */
protected int support = 0;
/** A reference variable to the child (if any) of the node. */
protected TtreeNode[] childRef = null;
/** Default constructor */
protected TtreeNode() {
}
/** One argument constructor.
@param sup the support value to be included in the structure. */
private TtreeNode(int sup) {
support = sup;
}
}
// Data structures
/** The reference to start of t-tree. */
private TtreeNode[] startTtreeRef;
/** 2-D aray to hold input data from data file */
private short[][] dataArray = null;
// Constants
/** Minimum support value */
private static final double MIN_SUPPORT = 0.0;
/** Maximum support value */
private static final double MAX_SUPPORT = 100.0;
// Flags
/** Input format PK flag */
private boolean inputFormatOkFlag = true;
/** Flag to indicate whether system has data or not. */
private boolean haveDataFlag = false;
/** Flag to indicate whether system has support value or not. */
private boolean hasSupportFlag = false;
/** The next level indicator flag: set to true if new level generated
and by default. */
private boolean nextLevelExists = true ;
// Other fields
/** Data file name. */
private File fileName;
/** Number of rows. */
private int numRows = 0;
/** Number of Cols. */
private int numCols = 0;
/** Support. */
private double support = 20.0;
/** Minimum support in terms of rows. */
private double minSupportRows = 1.0;
public AprioriTgui(String s) {
super(s);
// Content pane
Container container = getContentPane();
container.setBackground(Color.pink);
container.setLayout(new BorderLayout(5,5)); // 5 pixel gaps
// Run button
runButton = new JButton("Run");
runButton.addActionListener(this);
runButton.setEnabled(false);
// Open button
openButton = new JButton("Open File");
openButton.addActionListener(this);
// Input Support
minSupport = new JButton("Add Min. Sup.");
minSupport.addActionListener(this);
// Button Panel
JPanel buttonPanel = new JPanel();
buttonPanel.setLayout(new GridLayout(1,3));
buttonPanel.add(openButton);
buttonPanel.add(minSupport);
buttonPanel.add(runButton);
container.add(buttonPanel,BorderLayout.NORTH);
// Text area
textArea = new JTextArea(40, 15);
textArea.setEditable(false);
container.add(new JScrollPane(textArea),BorderLayout.CENTER);
// Credits Panel
JPanel creditsPanel = new JPanel();
creditsPanel.setBackground(Color.pink);
creditsPanel.setLayout(new GridLayout(4,1));
Label creditLabel1 = new Label("LUCS-KDD (Liverpool University Computer " +
"Science - Knowledge Discovery");
Label creditLabel2 = new Label("in Data) group Apriori-T " +
"demonstrator.");
Label creditLabel3 = new Label(" ");
Label creditLabel4 = new Label("Created by Frans Coenen (17 June " +
"2003)");
creditsPanel.add(creditLabel1);
creditsPanel.add(creditLabel2);
creditsPanel.add(creditLabel3);
creditsPanel.add(creditLabel4);
container.add(creditsPanel,BorderLayout.SOUTH);
}
/* ACTION PERFORMED */
public void actionPerformed(ActionEvent event) {
if (event.getActionCommand().equals("Open File")) getFileName();
if (event.getActionCommand().equals("Read File")) readFile();
if (event.getActionCommand().equals("Add Min. Sup.")) addSupport();
if (event.getActionCommand().equals("Run")) aprioriT();
}
/* ---------------------------------------------------------------- */
/* */
/* APRIORI-T */
/* */
/* ---------------------------------------------------------------- */
private void aprioriT() {
textArea.append("Apriori-T (Minimum support threshold = " + support +
"%)\n-----------------------------------------\n" +
"Generating K=1 large itemsets\n");
// Determin mimimum support in terms of rows
minSupportRows = numRows*support/100.0;
// Create Top level of T-tree (First pass of dataset)
createTtreeTopLevel();
// Generate level 2
generateLevel2();
// Further passes of the dataset
createTtreeLevelN();
textArea.append("\n");
outputFrequentSets();
}
/* CREATE T-TREE TOP LEVEL */
/** Generates level 1 (top) of the T-tree. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startTtreeRef = new TtreeNode[numCols+1];
for (int index=1;index<=numCols;index++)
startTtreeRef[index] = new TtreeNode();
// Add support for each 1 itemset
createTtreeTopLevel2();
// Prune top level, setting any unsupport 1 itemsets to null
pruneLevelN(startTtreeRef,1);
}
/** Adds supports to level 1 (top) of the T-tree. */
protected void createTtreeTopLevel2() {
// Loop through data set record by record and add support for each
// 1 itemset
for (int index1=0;index1 The general
generateLevelN method assumes we have to first find the right
level in the T-tree, that is not necessary in this case of level 2. */
protected void generateLevel2() {
// Set next level flag
nextLevelExists=false;
// loop through top level
for (int index=2;index
Where all nodes are supported and we wish to add the third level we would
walk the tree and attempt to add new nodes to every level 2 node found.
Having found the correct level we step through starting from B (we cannot
add a node to A), so in this case there is only one node from which a level
3 node may be attached.
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree).
@param level the level marker, set to 1 at the start of the recursion and
incremented by 1 on each repetition.
@param requiredLevel the required level.
@param itemSet the current itemset under consideration. */
protected void generateLevelN(TtreeNode[] linkRef, int level,
int requiredLevel, short[] itemSet) {
int index1;
int localSize = linkRef.length;
// Correct level
if (level == requiredLevel) {
for (index1=2;index1
where we wish to add a level 3 node to node (B), i.e. the node {A}, we
would proceed as follows:
Example 2, given:
where we wish to add a level 4 node (A) to (B) this would represent the
complete label {D,C,B,A}, the N-1 subsets will then be {{D,C,B},{D,C,A},
{D,B,A} and {C,B,A}}. We know the first two are supported becuase they are
contained in the current sub-branch of the T-tree, {D,B,A} and {C,B,A} are
not.
@param parentRef the reference to the level in the sub-branch of the T-tree
under consideration.
@param endIndex the index of the current node under consideration.
@param itemSet the complete label represented by the current node (required
to generate further itemsets to be X-checked). */
protected void generateNextLevel(TtreeNode[] parentRef, int endIndex,
short[] itemSet) {
parentRef[endIndex].childRef = new TtreeNode[endIndex]; // New level
short[] newItemSet;
// Generate a level in Ttree
TtreeNode currentNode = parentRef[endIndex];
// Loop through parent sub-level of siblings upto current node
for (int index=1;index itemSet1 = first two items in current item set
itemSet2 = remainder of items in current item set
Example 1:
Example 2:
Operates in a recursive manner.
Example 1: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C}
Example 2: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C,D}
Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given T-tree level (set to 1 at
start).
@param lastIndex the end index of the current T-tree level.
@param linRef the reference to the current T-tree level.
@return returns true if itemset found and false otherwise. */
private boolean findItemSetInTtree2(short[] itemSet, int index,
int lastIndex, TtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If element at "index" is last element of item set then item set
// found
if (index == lastIndex) return(true);
// Otherwise continue
else return(findItemSetInTtree2(itemSet,index+1,lastIndex,
linkRef[itemSet[index]].childRef));
}
// Item set not in Ttree
else return(false);
}
/* ---------------------------------------------------------------- */
/* */
/* GET MINIMUM SUPPORT VALUE */
/* */
/* ---------------------------------------------------------------- */
/* GET SUPPORT */
private void addSupport() {
try{
while (true) {
String stNum1 = JOptionPane.showInputDialog("Input minimum " +
" support value between " + MIN_SUPPORT + " and " +
MAX_SUPPORT);
if (stNum1.indexOf('.') > 0)
support = Double.parseDouble(stNum1);
else support = Integer.parseInt(stNum1);
if (support>=MIN_SUPPORT && support<=MAX_SUPPORT) break;
JOptionPane.showMessageDialog(null,
"MINIMUM SUPPORT VALUE INPUT ERROR:\n" +
"input = " + support +
"\nminimum support input must be a floating point\n" +
"number between " + MIN_SUPPORT + " and " +
MAX_SUPPORT);
}
textArea.append("Minimum support = " + support + "%\n");
hasSupportFlag=true;
}
catch(NumberFormatException e) {
hasSupportFlag=false;
runButton.setEnabled(false);
}
// Enable run button if have data and a minimum support value.
if (haveDataFlag && hasSupportFlag) runButton.setEnabled(true);
}
/* ---------------------------------------------------------------- */
/* */
/* OPEN NAME */
/* */
/* ---------------------------------------------------------------- */
/* OPEN THE FILE */
private void getFileName() {
// Display file dialog so user can select file to open
JFileChooser fileChooser = new JFileChooser();
fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
int result = fileChooser.showOpenDialog(this);
// If cancel button selected return
if (result == JFileChooser.CANCEL_OPTION) return;
// Obtain selected file
fileName = fileChooser.getSelectedFile();
// Read file if readabale (i.e not a direcrory etc.).
if (checkFileName()) {
readFile();
}
// Check ordering
if (inputFormatOkFlag) {
if (checkOrdering()) {
// Enable run button if have data and a minimum support value.
if (haveDataFlag && hasSupportFlag) runButton.setEnabled(true);
// Output to text area
outputDataArray();
textArea.append("Number of records = " + numRows + "\n");
countNumCols();
textArea.append("Number of columns = " + numCols + "\n");
}
else {
haveDataFlag = false;
inputFormatOkFlag = true;
textArea.append("Error reading file: " + fileName + "\n\n");
runButton.setEnabled(false);
}
}
}
/* CHECK FILE NAME */
/* Return flase if selected file is a directory, access is denied or is
not a file name. */
private boolean checkFileName() {
if (fileName.exists()) {
if (fileName.canRead()) {
if (fileName.isFile()) return(true);
else JOptionPane.showMessageDialog(null,
"FILE ERROR: File is a directory");
}
else JOptionPane.showMessageDialog(null,
"FILE ERROR: Access denied");
}
else JOptionPane.showMessageDialog(null,
"FILE ERROR: No such file!");
// Return
return(false);
}
/* READ FILE */
private void readFile() {
try {
// Dimension data structure
inputFormatOkFlag=true;
getNumberOfLines();
if (inputFormatOkFlag) {
dataArray = new short[numRows][];
// Read file
inputDataSet();
// Set have data flag to true
haveDataFlag = true;
}
else {
haveDataFlag = false;
textArea.append("Error reading file: " + fileName + "\n\n");
runButton.setEnabled(false);
}
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error reading File",
"Error 5: ",JOptionPane.ERROR_MESSAGE);
closeFile();
System.exit(1);
}
}
/* GET NUMBER OF LINES */
/** Gets number of lines in file and prepares data structure. */
private void getNumberOfLines() throws IOException {
int counter = 0;
// Open the file
openFile();
// Loop through file incrementing counter
// get first row.
String line = fileInput.readLine();
while (line != null) {
checkLine(counter+1,line);
StringTokenizer dataLine = new StringTokenizer(line);
int numberOfTokens = dataLine.countTokens();
if (numberOfTokens == 0) break;
counter++;
line = fileInput.readLine();
}
numRows = counter;
closeFile();
}
/* CHECK LINE */
/** Check whether input file is of appropriate numeric input. */
private void checkLine(int counter, String str) {
for (int index=0;index Operates in a recursive manner.
@param number the number of frequent sets so far.
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array lavel in the T-tree.
@return the incremented (possibly) number the number of frequent sets so
far. */
private int outputFrequentSets(int number, short[] itemSetSofar, int size,
TtreeNode[] linkRef) {
// No more nodes
if (linkRef == null) return(number);
// Otherwise process
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupportRows) {
short[] newItemSetSofar = realloc2(itemSetSofar,
(short) index);
textArea.append("[" + number + "] ");
outputItemSet(newItemSetSofar);
textArea.append(" = " + linkRef[index].support + "\n");
number = outputFrequentSets(number + 1,newItemSetSofar,
index,linkRef[index].childRef);
}
}
}
// Return
return(number);
}
/* ------------------------------------------------------- */
/* */
/* FILE HANDLING UTILITIES */
/* */
/* ------------------------------------------------------- */
/* OPEN FILE */
private void openFile() {
try {
// Open file
FileReader file = new FileReader(fileName);
fileInput = new BufferedReader(file);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Opening File",
"Error 4: ",JOptionPane.ERROR_MESSAGE);
}
}
/* CLOSE FILE */
private void closeFile() {
if (fileInput != null) {
try {
fileInput.close();
}
catch (IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Opening File",
"Error 4: ",JOptionPane.ERROR_MESSAGE);
}
}
}
/* ------------------------------------------------------- */
/* */
/* ARM UTILITIES */
/* */
/* ------------------------------------------------------- */
/* REALLOC 1 */
/** Resizes given item set so that its length is increased by one
and append new element
@param oldItemSet the original item set
@param newElement the new element/attribute to be appended
@return the combined item set */
protected short[] realloc1(short[] oldItemSet, short newElement) {
// No old item set
if (oldItemSet == null) {
short[] newItemSet = {newElement};
return(newItemSet);
}
// Otherwise create new item set with length one greater than old
// item set
int oldItemSetLength = oldItemSet.length;
short[] newItemSet = new short[oldItemSetLength+1];
// Loop
int index;
for (index=0;index < oldItemSetLength;index++)
newItemSet[index] = oldItemSet[index];
newItemSet[index] = newElement;
// Return new item set
return(newItemSet);
}
/* APPEND */
/** Concatinates two itemSets --- resizes given array so that its
length is increased by size of second array and second array added.
@param itemSet1 The first item set.
@param itemSet2 The item set to be appended.
@return the combined item set */
protected short[] append(short[] itemSet1, short[] itemSet2) {
// Test for emty sets, if found return other
if (itemSet1 == null) return(copyItemSet(itemSet2));
else if (itemSet2 == null) return(copyItemSet(itemSet1));
// Create new array
short[] newItemSet = new short[itemSet1.length+itemSet2.length];
// Loop through itemSet 1
int index1;
for(index1=0;index1
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
(A) ----- (D)
|
|
(A) ----- (B) ----- (C)
|
|
(A) ----- (B)
currentItemSet = {A,B,C}
itemSet1 = {B,A} (change of ordering)
size = {A,B,C}-2 = 1
itemSet2 = {C} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C}
currentItemSet = {A,B,C,D}
itemSet1 = {B,A} (change of ordering)
itemSet2 = {C,D} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C,D}
@param currentItemSet the given itemset. */
protected boolean testCombinations(short[] currentItemSet) {
if (currentItemSet.length < 3) return(true);
// Creat itemSet1 (note ordering)
short[] itemSet1 = new short[2];
itemSet1[0] = currentItemSet[1];
itemSet1[1] = currentItemSet[0];
// Creat itemSet2
int size = currentItemSet.length-2;
short[] itemSet2 = removeFirstNelements(currentItemSet,2);
// Calculate combinations
return(combinations(null,0,2,itemSet1,itemSet2));
}
/* COMBINATIONS */
/** Determines the cardinality N combinations of a given itemset and then
checks whether those combinations are supported in the T-tree.
itemSet2.length = 1
endIndex = 2 greater than itemSet2.length if condition succeeds
tesSet = null+{B,A} = {B,A}
retutn true if {B,A} supported and null otherwise
endindex not greater than length {C,D}
go into loop
tempSet = {} + {C} = {C}
combinations with --- sofarSet={C}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {C} + {B,A} = {C,B,A}
tempSet = {} + {D} = {D}
combinations with --- sofarSet={D}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {D} + {B,A} = {D,B,A}
@param sofarSet The combination itemset generated so far (set to null at
start)
@param startIndex the current index in the given itemSet2 (set to 0 at
start).
@param endIndex The current index of the given itemset (set to 2 at start)
and incremented on each recursion until it is greater than the length of
itemset2.
@param itemSet1 The first two elements (reversed) of the totla label for the
current item set.
@param itemSet2 The remainder of the current item set.
*/
private boolean combinations(short[] sofarSet, int startIndex,
int endIndex, short[] itemSet1, short[] itemSet2) {
// At level
if (endIndex > itemSet2.length) {
short[] testSet = append(sofarSet,itemSet1);
// If testSet exists in the T-tree sofar then it is supported
return(findItemSetInTtree(testSet));
}
// Otherwise
else {
short[] tempSet;
for (int index=startIndex;index