/* -------------------------------------------------------------------------- */ /* */ /* ASSOCIATION RULE DATA MINING */ /* */ /* Frans Coenen */ /* */ /* Wednesday 9 January 2003 */ /* (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ // Java packages import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Set of utilities to support various Association Rule Mining (ARM) algorithms included in the LUCS-KDD suite of ARM programs. @author Frans Coenen @version 2 July 2003 */ public class AssocRuleMining extends JFrame { /* ------ FIELDS ------ */ // Data structures /** 2-D aray to hold input data from data file */ protected short[][] dataArray = null; // Command line arguments with default values and associated fields /** Command line argument for data file name. */ protected String fileName = null; /** Number of classes in input data set (input by the user). */ protected int numClasses = 0; // Flags /** Error flag used when checking command line arguments (default = true). */ protected boolean errorFlag = true; /** Input format OK flag( default = true). */ protected boolean inputFormatOkFlag = true; // Other fields /** Number of columns. */ protected int numCols = 0; /** Number of rows. */ protected int numRows = 0; /** The number of one itemsets (singletons). */ protected int numOneItemSets = 0; /** The input stream. */ protected BufferedReader fileInput; /** The file path */ protected File filePath = null; /* ------ CONSTRUCTORS ------ */ /** Processes command line arguments */ public AssocRuleMining(String[] args) { // Process command line arguments for(int index=0;indexerrorFlag set to false. */ protected void checkFileName() { if (fileName == null) { System.out.println("INPUT ERROR: Must specify file name (-F)"); errorFlag = false; } } /* ---------------------------------------------------------------- */ /* */ /* READ INPUT DATA FROM FILE */ /* */ /* ---------------------------------------------------------------- */ /* READ FILE */ /** Reads input data from file specified in command line argument (GUI version also exists).

Proceeds as follows:

  1. Gets number of lines in file, checking format of each line (space separated integers), if incorrectly formatted line found inputFormatOkFlag set to false.
  2. Dimensions input array.
  3. Reads data
*/ protected void readFile() { try { // Dimension data structure inputFormatOkFlag=true; numRows = getNumberOfLines(fileName); if (inputFormatOkFlag) { dataArray = new short[numRows][]; // Read file System.out.println("Reading input file: " + fileName); readInputDataSet(); } else System.out.println("Error reading file: " + fileName + "\n"); } catch(IOException ioException) { System.out.println("Error reading File"); closeFile(); System.exit(1); } } /* GET NUMBER OF LINES */ /** Gets number of lines/records in input file and checks format of each line. @param nameOfFile the filename of the file to be opened. @return the number pf rows in the given file. */ protected int getNumberOfLines(String nameOfFile) throws IOException { int counter = 0; // Open the file if (filePath==null) openFileName(nameOfFile); else openFilePath(); // Loop through file incrementing counter // get first row. String line = fileInput.readLine(); while (line != null) { checkLine(counter+1,line); StringTokenizer dataLine = new StringTokenizer(line); int numberOfTokens = dataLine.countTokens(); if (numberOfTokens == 0) break; counter++; line = fileInput.readLine(); } // Close file and return closeFile(); return(counter); } /* CHECK LINE */ /** Check whether given line from input file is of appropriate format (space separated integers), if incorrectly formatted line found inputFormatOkFlag set to false. @param counter the line number in the input file. @param str the current line from the input file. */ protected void checkLine(int counter, String str) { for (int index=0;index = itemSet[index+1]) { JOptionPane.showMessageDialog(null,"FILE FORMAT ERROR:\n" + "Attribute data in line " + lineNum + " not in numeric order"); return(false); } } // Default return return(true); } /* COUNT NUMBER OF COLUMNS */ /** Counts number of columns represented by input data. */ protected void countNumCols() { int maxAttribute=0; // Loop through data array for(int index=0;index maxAttribute) maxAttribute = dataArray[index][lastIndex]; } numCols = maxAttribute; numOneItemSets = numCols; // default value only } /* OPEN FILE NAME */ /** Opens file using fileName (instance field). @param nameOfFile the filename of the file to be opened. */ protected void openFileName(String nameOfFile) { try { // Open file FileReader file = new FileReader(nameOfFile); fileInput = new BufferedReader(file); } catch(IOException ioException) { JOptionPane.showMessageDialog(this,"Error Opening File", "Error: ",JOptionPane.ERROR_MESSAGE); } } /* OPEN FILE PATH */ /** Opens file using filePath (instance field). */ private void openFilePath() { try { // Open file FileReader file = new FileReader(filePath); fileInput = new BufferedReader(file); } catch(IOException ioException) { JOptionPane.showMessageDialog(this,"Error Opening File", "Error: ",JOptionPane.ERROR_MESSAGE); } } /* CLOSE FILE */ /** Close file fileName (instance field). */ protected void closeFile() { if (fileInput != null) { try { fileInput.close(); } catch (IOException ioException) { JOptionPane.showMessageDialog(this,"Error Closing File", "Error: ",JOptionPane.ERROR_MESSAGE); } } } /* BINARY CONVERSION. */ /** Produce an item set (array of elements) from input line. @param dataLine row from the input data file @param numberOfTokens number of items in row @return 1-D array of short integers representing attributes in input row */ protected short[] binConversion(StringTokenizer dataLine, int numberOfTokens) { short number; short[] newItemSet = null; // Load array for (int tokenCounter=0;tokenCounter < numberOfTokens;tokenCounter++) { number = new Short(dataLine.nextToken()).shortValue(); newItemSet = realloc1(newItemSet,number); } // Return itemSet return(newItemSet); } /* ----------------------------------------------- */ /* */ /* ITEM SET INSERT AND ADD METHODS */ /* */ /* ----------------------------------------------- */ /* REALLOC INSERT */ /** Resizes given item set so that its length is increased by one and new element inserted. @param oldItemSet the original item set @param newElement the new element/attribute to be inserted @return the combined item set */ protected short[] reallocInsert(short[] oldItemSet, short newElement) { // No old item set if (oldItemSet == null) { short[] newItemSet = {newElement}; return(newItemSet); } // Otherwise create new item set with length one greater than old // item set int oldItemSetLength = oldItemSet.length; short[] newItemSet = new short[oldItemSetLength+1]; // Loop int index1; for (index1=0;index1 < oldItemSetLength;index1++) { if (newElement < oldItemSet[index1]) { newItemSet[index1] = newElement; // Add rest for(int index2 = index1+1;index2