/* -------------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE DATA MINING */
/* */
/* Frans Coenen */
/* */
/* Wednesday 9 January 2003 */
/* (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004) */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
// Java packages
import java.io.*;
import java.util.*;
// Java GUI packages
import javax.swing.*;
/** Set of utilities to support various Association Rule Mining (ARM)
algorithms included in the LUCS-KDD suite of ARM programs.
@author Frans Coenen
@version 2 July 2003 */
public class AssocRuleMining extends JFrame {
/* ------ FIELDS ------ */
// Data structures
/** 2-D aray to hold input data from data file */
protected short[][] dataArray = null;
// Command line arguments with default values and associated fields
/** Command line argument for data file name. */
protected String fileName = null;
/** Number of classes in input data set (input by the user). */
protected int numClasses = 0;
// Flags
/** Error flag used when checking command line arguments (default =
true). */
protected boolean errorFlag = true;
/** Input format OK flag( default = true). */
protected boolean inputFormatOkFlag = true;
// Other fields
/** Number of columns. */
protected int numCols = 0;
/** Number of rows. */
protected int numRows = 0;
/** The number of one itemsets (singletons). */
protected int numOneItemSets = 0;
/** The input stream. */
protected BufferedReader fileInput;
/** The file path */
protected File filePath = null;
/* ------ CONSTRUCTORS ------ */
/** Processes command line arguments */
public AssocRuleMining(String[] args) {
// Process command line arguments
for(int index=0;indexerrorFlag set
to false. */
protected void checkFileName() {
if (fileName == null) {
System.out.println("INPUT ERROR: Must specify file name (-F)");
errorFlag = false;
}
}
/* ---------------------------------------------------------------- */
/* */
/* READ INPUT DATA FROM FILE */
/* */
/* ---------------------------------------------------------------- */
/* READ FILE */
/** Reads input data from file specified in command line argument (GUI
version also exists). Proceeds as follows:
- Gets number of lines in file, checking format of each line (space
separated integers), if incorrectly formatted line found
inputFormatOkFlag set to false.
- Dimensions input array.
- Reads data
*/
protected void readFile() {
try {
// Dimension data structure
inputFormatOkFlag=true;
numRows = getNumberOfLines(fileName);
if (inputFormatOkFlag) {
dataArray = new short[numRows][];
// Read file
System.out.println("Reading input file: " + fileName);
readInputDataSet();
}
else System.out.println("Error reading file: " + fileName + "\n");
}
catch(IOException ioException) {
System.out.println("Error reading File");
closeFile();
System.exit(1);
}
}
/* GET NUMBER OF LINES */
/** Gets number of lines/records in input file and checks format of each
line.
@param nameOfFile the filename of the file to be opened.
@return the number pf rows in the given file. */
protected int getNumberOfLines(String nameOfFile) throws IOException {
int counter = 0;
// Open the file
if (filePath==null) openFileName(nameOfFile);
else openFilePath();
// Loop through file incrementing counter
// get first row.
String line = fileInput.readLine();
while (line != null) {
checkLine(counter+1,line);
StringTokenizer dataLine = new StringTokenizer(line);
int numberOfTokens = dataLine.countTokens();
if (numberOfTokens == 0) break;
counter++;
line = fileInput.readLine();
}
// Close file and return
closeFile();
return(counter);
}
/* CHECK LINE */
/** Check whether given line from input file is of appropriate format
(space separated integers), if incorrectly formatted line found
inputFormatOkFlag set to false.
@param counter the line number in the input file.
@param str the current line from the input file. */
protected void checkLine(int counter, String str) {
for (int index=0;index = itemSet[index+1]) {
JOptionPane.showMessageDialog(null,"FILE FORMAT ERROR:\n" +
"Attribute data in line " + lineNum +
" not in numeric order");
return(false);
}
}
// Default return
return(true);
}
/* COUNT NUMBER OF COLUMNS */
/** Counts number of columns represented by input data. */
protected void countNumCols() {
int maxAttribute=0;
// Loop through data array
for(int index=0;index maxAttribute)
maxAttribute = dataArray[index][lastIndex];
}
numCols = maxAttribute;
numOneItemSets = numCols; // default value only
}
/* OPEN FILE NAME */
/** Opens file using fileName (instance field).
@param nameOfFile the filename of the file to be opened. */
protected void openFileName(String nameOfFile) {
try {
// Open file
FileReader file = new FileReader(nameOfFile);
fileInput = new BufferedReader(file);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Opening File",
"Error: ",JOptionPane.ERROR_MESSAGE);
}
}
/* OPEN FILE PATH */
/** Opens file using filePath (instance field). */
private void openFilePath() {
try {
// Open file
FileReader file = new FileReader(filePath);
fileInput = new BufferedReader(file);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Opening File",
"Error: ",JOptionPane.ERROR_MESSAGE);
}
}
/* CLOSE FILE */
/** Close file fileName (instance field). */
protected void closeFile() {
if (fileInput != null) {
try {
fileInput.close();
}
catch (IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Closing File",
"Error: ",JOptionPane.ERROR_MESSAGE);
}
}
}
/* BINARY CONVERSION. */
/** Produce an item set (array of elements) from input line.
@param dataLine row from the input data file
@param numberOfTokens number of items in row
@return 1-D array of short integers representing attributes in input
row */
protected short[] binConversion(StringTokenizer dataLine,
int numberOfTokens) {
short number;
short[] newItemSet = null;
// Load array
for (int tokenCounter=0;tokenCounter < numberOfTokens;tokenCounter++) {
number = new Short(dataLine.nextToken()).shortValue();
newItemSet = realloc1(newItemSet,number);
}
// Return itemSet
return(newItemSet);
}
/* ----------------------------------------------- */
/* */
/* ITEM SET INSERT AND ADD METHODS */
/* */
/* ----------------------------------------------- */
/* REALLOC INSERT */
/** Resizes given item set so that its length is increased by one
and new element inserted.
@param oldItemSet the original item set
@param newElement the new element/attribute to be inserted
@return the combined item set */
protected short[] reallocInsert(short[] oldItemSet, short newElement) {
// No old item set
if (oldItemSet == null) {
short[] newItemSet = {newElement};
return(newItemSet);
}
// Otherwise create new item set with length one greater than old
// item set
int oldItemSetLength = oldItemSet.length;
short[] newItemSet = new short[oldItemSetLength+1];
// Loop
int index1;
for (index1=0;index1 < oldItemSetLength;index1++) {
if (newElement < oldItemSet[index1]) {
newItemSet[index1] = newElement;
// Add rest
for(int index2 = index1+1;index2