/* -------------------------------------------------------------------------- */ /* */ /* ASSOCIATION RULE DATA MINING */ /* */ /* Frans Coenen */ /* */ /* Wednesday 9 January 2003 */ /* (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004, 8/5/2004, */ /* 1/2/2005, 3/2/2005, 14/2/06, 14/3/06, 18/6/06, 1/7/2006, 11/10/2006, */ /* 27/10/2006, 14/11/2006, 8/10/2008, 18/5/2010) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* To compile: javac AssocRuleMining.java */ // Java packages import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Set of utillities to support various Association Rule Mining (ARM) algorithms included in the LUCS-KDD suite of ARM programs. @author Frans Coenen @version 11 October 2006 */ public class AssocRuleMining extends JFrame { /* ------ FIELDS ------ */ /** Constants. */ protected int MAX_NUM_RULES = 10000; // Data structures /** The reference to start of the rule list. */ protected RuleNode startRulelist = null; /** 2-D aray to hold input data from data file. Note that within the data array records are numbered from zero, thus record one has index 0 etc.
First index is row (record or TID) number starting from 0, and second is attribute (column) number starting from zero. */ protected short[][] dataArray = null; /** 2-D array used to renumber columns for input data in terms of frequency of single attributes (reordering will enhance performance for some ARM algorithms). */ protected int[][] conversionArray = null; /** 1-D array used to reconvert input data column numbers to their original numbering where the input data has been ordered to enhance computational efficiency. */ protected short[] reconversionArray = null; /** 1-D array to hold output schema. */ protected String[] outputSchema = null; // Constants /** Minimum support value */ protected static final double MIN_SUPPORT = 0.0; /** Maximum support value */ protected static final double MAX_SUPPORT = 100.0; /** Maximum confidence value */ protected static final double MIN_CONFIDENCE = 0.0; /** Maximum confidence value */ protected static final double MAX_CONFIDENCE = 100.0; // Command line arguments with default values and associated fields. /** Command line argument for data file name. */ protected String fileName = null; /** Command line argument for output file name. */ protected String outputFileName = null; /** Command line argument for file name for testset (used in classification where separate test and training set files may be used. */ protected String testSetFileName = null; /** Command line argument for number of columns (attributes) in input data. */ protected int numCols = 0; /** Command line argument for number of rows in input data. */ protected int numRows = 0; /** Command line argument for % support (default = 20%). */ protected double support = 20.0; // More fields /** Minimum support value in terms of number of rows.
Set when input
data is read and the number of records is known, reset if input data is
resized so that only N percent is used. */
protected double minSupport = 0;
/** Command line argument for % confidence (default = 80%). */
protected double confidence = 80.0;
/** The number of one itemsets (singletons). */
protected int numOneItemSets = 0;
/** The number of frequent sets identified during processing. */
protected int numFrequentSets = 0;
/** The number of rules (ARs, CARs or CRs) that have been generated. */
protected int numRules = 0;
/** Number of classes in input data set (input by the user). */
protected int numClasses = 0;
/** Number of rows in output schema. */
private int numRowsInOutputSchema = 0;
// Flags
/** Error flag used when checking command line arguments (default =
true). */
protected boolean errorFlag = true;
/** Input format OK flag( default = true). */
protected boolean inputFormatOkFlag = true;
/** Flag to indicate whether system has data or not. */
protected boolean haveDataFlag = false;
/** Flag to indicate whether input data has been sorted or not. */
protected boolean isOrderedFlag = false;
/** Flag to indicate whether input data has been sorted and pruned or
not. */
protected boolean isPrunedFlag = false;
/** Flag to indicate whether output schema is available or not. */
protected boolean hasOutputSchemaFlag = false;
/** Support confidence framework flag. */
protected boolean supConfFworkFlag = false;
/** Support lift framework flag. */
protected boolean supLiftFworkFlag = false;
/** Output rule set to file flag */
protected boolean outputRuleSetToFileFlag = false;
// Other fields
/** The input stream, instance of class BufferedReader. */
protected BufferedReader fileInput;
/** The output stream, instance of calss PrintWriter */
private PrintWriter fileOutput;
/** The file path */
protected File filePath = null;
/* ------ CONSTRUCTORS ------ */
/** Constructor with command line arguments to be process.
@param args the command line arguments (array of String instances). */
public AssocRuleMining(String[] args) {
// Process command line arguments
for(int index=0;index Note that it is assumed
that no empty records are included. Proceeds as follows:
Similar to getNmberOfLines method above but without line checking.
@param fName the name of the output schema file.
@return the number of lines (attributes) in the output schema. */
protected int getNumLinesInOutputSchema(String fName) throws IOException {
int rowIndex=0;
// Open the file
if (filePath==null) openFileName(fName);
else openFilePath();
// Get first row.
String line = fileInput.readLine();
// Process rest of file
while (line != null) {
// Increment row index in output schema array
rowIndex++;
// get next line
line = fileInput.readLine();
}
// Close file and returm
closeFile();
return(rowIndex);
}
/* READ OUTPUT SCHEMA */
/** Reads output schema from file. */
public void readOutputSchema() throws IOException {
readOutputSchema(fileName);
}
/* READ OUTPUT SCEMA */
/** Reads outpur schema from given file.
@param fName the given file name. */
protected void readOutputSchema(String fName) throws IOException {
int rowIndex=0;
// Open the file
if (filePath==null) openFileName(fName);
else openFilePath();
// Get first row.
String line = fileInput.readLine();
// Process rest of file
while (line != null) {
outputSchema[rowIndex] = line;
// Increment row index in output schema array
rowIndex++;
// get next line
line = fileInput.readLine();
}
// Close file
closeFile();
}
/** Check if number of attributes in output schema are same as number of
attributes in input file. If either the output schema or the input
file has nor been loaded then method will return false.
@return true if number of attributes are the same and false otherwise. */
public boolean checkSchemaVdata() {
boolean schemaAndDataAttsSame = true;
// Check schema
if (outputSchema==null) {
JOptionPane.showMessageDialog(null,"No output schema file.",
"CHECK SCHEMA v DATA ATTRIBUTES ERROR",
JOptionPane.ERROR_MESSAGE);
return(!schemaAndDataAttsSame);
}
// Check data array
if (dataArray==null) {
JOptionPane.showMessageDialog(null,"No input data file.",
"CHECK SCHEMA v DATA ATTRIBUTES ERROR",
JOptionPane.ERROR_MESSAGE);
return(!schemaAndDataAttsSame);
}
// Check lengths.
if (outputSchema.length==numCols) return(schemaAndDataAttsSame);
else {
JOptionPane.showMessageDialog(null,"Number of attributes in " +
"schema file (" + outputSchema.length + ") not\n" +
"same as number of attributes in data file (" +
numCols + ")\n","CHECK SCHEMA v DATA ATTRIBUTES ERROR",
JOptionPane.ERROR_MESSAGE);
return(!schemaAndDataAttsSame);
}
}
/* ---------------------------------------------------------------- */
/* */
/* READ INPUT DATA FROM FILE (GUI VERSIONS) */
/* */
/* ---------------------------------------------------------------- */
/* INPUT DATA SET */
/** Commences process of getting input data.
@param textArea the text area in the GUI used for output.
@param fName the name of the input file to be read. */
public void inputDataSet(JTextArea textArea, File fName) {
// Set filePath instance field
filePath = fName;
// Read the file
readFile(textArea);
// Check ordering (only if input format is OK)
if (inputFormatOkFlag) {
if (checkOrdering()) {
// Output to text area
textArea.append("Number of records = " + numRows + "\n");
countNumCols();
textArea.append("Number of columns = " + numCols + "\n");
// Set have data flag to true
haveDataFlag = true;
}
else {
// Set have data flag to false
haveDataFlag = false;
// Set inputFormatOkFlag to true by default for next input
// file
inputFormatOkFlag = true;
textArea.append("Error reading file: " + filePath + "\n\n");
}
}
}
/* READ FILE */
/** Reads input data from file specified in command line argument.
Proceeds as follows:
Example, given the data set:
Used when ordering classification input
data where we wish classes to be listed last.
@param countArray The 2-D array returned by the countSingles
method.
@param endIndex the index of the Nth element. */
protected void orderFirstNofCountArray(int[][] countArray, int endIndex) {
int attribute, quantity;
boolean isOrdered;
int index;
do {
isOrdered = true;
index = 1;
while (index < endIndex) {
if (countArray[index][1] >= countArray[index+1][1]) index++;
else {
isOrdered=false;
// Swap
attribute = countArray[index][0];
quantity = countArray[index][1];
countArray[index][0] = countArray[index+1][0];
countArray[index][1] = countArray[index+1][1];
countArray[index+1][0] = attribute;
countArray[index+1][1] = quantity;
// Increment index
index++;
}
}
} while (isOrdered==false);
}
/* DEFINE CONVERSION ARRAYS: */
/** Defines conversion and reconversion arrays.
@param countArray The 2-D array sorted by the orderCcountArray
method.*/
protected void defConvertArrays(int[][] countArray) {
// Dimension arrays
conversionArray = new int[numCols+1][2];
reconversionArray = new short[numCols+1];
// Assign values by processing the count array which has now been
// ordered.
for(int index=1;index The tree is ordered from
left to right so that rules with highest "ordering value" (this is assumed
to be a confidence value but could equaly well be some other value such as
lift) are listed in the left most branch. If two rules have the same
ordering value the new rule will be placed after the existing rule. Thus,
if using an Apriori approach to generating rules, more general rules will
appear first in the list with more specific rules (i.e. rules with a larger
antecedent) appearing later as the more general rules will be generated
first.
@param antecedent the antecedent (LHS) of the rule.
@param consequent the consequent (RHS) of the rule.
@param ordValue1 the primary ordering value (usually the confidence value).
@param ordValue2 the secondary ordering value (usually the support
value). */
protected void insertRuleIntoRulelist(short[] antecedent,
short[] consequent, double ordValue1, double ordValue2) {
//System.out.print("antecedent = ");
//outputItemSet(antecedent);
//System.out.print(", consequent = ");
//outputItemSet(consequent);
//System.out.println(", confidence = " + ordValue1 + ", support = " + ordValue2);
// Check if limit is reached
if (numRules > MAX_NUM_RULES) return;
// Check for empty tree.
if (startRulelist == null) startRulelist = new RuleNode(antecedent,
consequent,ordValue1,ordValue2);
// Otherwise "walk" tree
else insertRuleIntoRulelist(startRulelist,antecedent,consequent,
ordValue1,ordValue2);
}
/** Inserts an (association/classification) rule into the bianry tree of
rules pointed at by startRulelist. Version which uses only one
ordinal value, second ordinal value set to 0.0.
@param antecedent the antecedent (LHS) of the rule.
@param consequent the consequent (RHS) of the rule.
@param ordValue1 the primary ordering value (usually the confidence
value). */
protected void insertRuleIntoRulelist(short[] antecedent,
short[] consequent, double ordValue1) {
double ordValue2=0.0;
// Check for empty tree.
if (startRulelist == null) startRulelist = new RuleNode(antecedent,
consequent,ordValue1,ordValue2);
// Otherwise "walk" tree
else insertRuleIntoRulelist(startRulelist,antecedent,consequent,
ordValue1,ordValue2);
}
/** Continues process of adding rule to binary tree.
@param node the current location in the bin tree.
@param antecedent the antecedent (LHS) of the rule.
@param consequent the consequent (RHS) of the rule.
@param ordValue1 the primary ordering value (usually the confidence value).
@param ordValue2 the secondary ordering value (usually the support
value). */
private void insertRuleIntoRulelist(RuleNode node, short[] antecedent,
short[] consequent, double ordValue1, double ordValue2) {
// Calculate selector
boolean prcDwnLftBrnch = insertRuleSelector(node,antecedent,
ordValue1,ordValue2);
// Left branch
if (prcDwnLftBrnch) {
if (node.leftBranch==null) node.leftBranch = new
RuleNode(antecedent,consequent,ordValue1,ordValue2);
else insertRuleIntoRulelist(node.leftBranch,antecedent,consequent,
ordValue1,ordValue2);
}
// Right branch
else {
if (node.rightBranch==null) node.rightBranch = new
RuleNode(antecedent,consequent,ordValue1,ordValue2);
else insertRuleIntoRulelist(node.rightBranch,antecedent,consequent,
ordValue1,ordValue2);
}
}
/** Calculates selector for deciding whether to add rule to left or right
branch of binary-tree rule storage structure.
@param node the current location in the bin tree.
@param antecedent the antecedent (LHS) of the rule.
@param ordValue1 the primary ordering value (usually the confidence value).
@param ordValue2 the secondary ordering value (usually the support
value).
@return true if left branch and false otherwise. */
protected boolean insertRuleSelector(RuleNode node, short[] antecedent,
double ordValue1, double ordValue2) {
boolean prcDwnLftBrnch = false;
// Compare "confidence" value
if (ordValue1>node.confidenceForRule) prcDwnLftBrnch = true;
else {
// Compare "support" value
if (ordValue1==node.confidenceForRule) {
if (ordValue2>node.supportForRule) prcDwnLftBrnch = true;
// Compare size of antecedent
else {
if (ordValue2==node.supportForRule &&
antecedent.length>node.antecedent.length)
prcDwnLftBrnch = true;
}
}
}
// Rerturn
return(prcDwnLftBrnch);
}
/* -------------------------------------------------------------- */
/* */
/* NUMBER RULES */
/* */
/* -------------------------------------------------------------- */
/** Numbers and counts rules contained in binary-tree, usually done
when tree is complete. (CMAR and CBA use a different rule storage
structure.) */
protected void numberRulesInBinTree() {
// Check for empty tree.
if (startRulelist == null) numRules=0;
// Else proceed
else {
short startNumber=1;
numRules = numberRulesInBinTree(startNumber,startRulelist)-1;
}
}
/** Continues provess of numbering rules in rule binary (if any).
@param number the current rule number.
@param linkRuleNode the currentNode.
@return the updated rule number sofar. */
private short numberRulesInBinTree(short number, RuleNode linkRuleNode) {
// Process node
if (linkRuleNode != null) {
// Left branch
number = numberRulesInBinTree(number,linkRuleNode.leftBranch);
// Node
linkRuleNode.ruleNumber=number;
number++;
// Right branch
number = numberRulesInBinTree(number,linkRuleNode.rightBranch);
}
// Return
return(number);
}
/* -------------------------------------------------------------- */
/* */
/* GET CONSEQUENT FOR RULE N */
/* */
/* -------------------------------------------------------------- */
/** Gets the consequent associated with a particular rule identified by
its rule number.
@param ruleNumber the identifying number of the desired rule consequent.
@return the associated consequent (as an itemset). */
protected short[] getConsequentOfRuleN(int ruleNumber) {
// Check for empty tree.
if (startRulelist == null) return(null);
// Else proceed
return(getConsequentOfRuleN(ruleNumber,startRulelist));
}
/** Continues provess of returning consequent associated with a particular
rule identified by its rule number.
@param ruleNum the identifying number of the desired rule consequent.
@param linkRuleNode the currentNode.
@return the associated consequent (as an itemset). */
public short[] getConsequentOfRuleN(int ruleNum, RuleNode linkRuleNode) {
//System.out.println("getConsequentOfRuleN: ruleNum = " + ruleNum);
//System.out.println("linkRuleNode.ruleNumber = " + linkRuleNode.ruleNumber);
// Found rule
if (linkRuleNode.ruleNumber==ruleNum) return(linkRuleNode.consequent);
// Proceed down left branch?
if (ruleNum List is ordered so that
more specific rules (i.e. rules with most attributes in their antecedent)
are listed first. **** NOT CURRENTLY USED ****
@param antecedent the antecedent (LHS) of the rule.
@param consequent the consequent (RHS) of the rule.
@param confidenceForRule the associated confidence value. */
/* protected void insertRuleIntoRulelist2(short[] antecedent,
short[] consequent, double confidenceForRule) {
// Check for empty tree.
if (startRulelist == null) startRulelist = new RuleNode(antecedent,
consequent,confidenceForRule);
// Otherwise "walk" tree
else insertRuleIntoRulelist2(startRulelist,antecedent,consequent,
confidenceForRule);
} */
/** Continues process of adding rule to binary tree according to size of
antecedent
@param currentNode the current location in the bin tree.
@param antecedent the antecedent (LHS) of the rule.
@param consequent the consequent (RHS) of the rule.
@param orderingValue the associated support value. */
/* private void insertRuleIntoRulelist2(RuleNode currentNode,
short[] antecedent, short[] consequent, double orderingValue) {
// Left branch
if (antecedent.length>currentNode.antecedent.length) {
if (currentNode.leftBranch==null) currentNode.leftBranch = new
RuleNode(antecedent,consequent,orderingValue);
else insertRuleIntoRulelist2(currentNode.leftBranch,antecedent,
consequent,orderingValue);
}
// Right branch
else {
if (currentNode.rightBranch==null) currentNode.rightBranch = new
RuleNode(antecedent,consequent,orderingValue);
else insertRuleIntoRulelist2(currentNode.rightBranch,antecedent,
consequent,orderingValue);
}
} */
/* ----------------------------------------------- */
/* */
/* ITEM SET INSERT AND ADD METHODS */
/* */
/* ----------------------------------------------- */
/* APPEND */
/** Concatenates two itemSets --- resizes given array so that its
length is increased by size of second array and second array added.
@param itemSet1 The first item set.
@param itemSet2 The item set to be appended.
@return the combined item set */
protected short[] append(short[] itemSet1, short[] itemSet2) {
// Test for empty sets, if found return other
if (itemSet1 == null) return(copyItemSet(itemSet2));
else if (itemSet2 == null) return(copyItemSet(itemSet1));
// Create new array
short[] newItemSet = new short[itemSet1.length+itemSet2.length];
// Loop through itemSet 1
int index1;
for(index1=0;index1 Note that
given itemSets may not be disjoint.
@param itemSet1 The first given item set.
@param itemSet2 the se4cond given item set.
@return the union of the two itemSets. */
protected short[] union(short[] itemSet1, short[] itemSet2) {
// check for null sets
if (itemSet1 == null) {
if (itemSet2 == null) return(null);
else return(itemSet2);
}
if (itemSet2 == null) return(itemSet1);
// determine size of union and dimension return itemSet
short[] newItemSet = new short[sizeOfUnion(itemSet1,itemSet2)];
// Loop through itemSets
int index1=0, index2=0, index3=0;
while (index1
For example given the item set [1,2,3] this will result in the
combinations[[1],[2],[3],[1,2],[1,3],[2,3],[1,2,3]].
@param inputSet the given item set.
@return array of arrays representing all possible combinations (may be null
if no combinations). */
protected short[][] combinations(short[] inputSet) {
if (inputSet == null) return(null);
else {
short[][] outputSet = new short[getCombinations(inputSet)][];
combinations(inputSet,0,null,outputSet,0);
return(outputSet);
}
}
/** Recursively calculates all possible combinations of a given item
set.
@param inputSet the given item set.
@param inputIndex the index within the input set marking current
element under consideration (0 at start).
@param sofar the part of a combination determined sofar during the
recursion (null at start).
@param outputSet the combinations collected so far, will hold all
combinations when recursion ends.
@param outputIndex the current location in the output set.
@return revised output index. */
private int combinations(short[] inputSet, int inputIndex,
short[] sofar, short[][] outputSet, int outputIndex) {
short[] tempSet;
int index=inputIndex;
// Loop through input array
while(index < inputSet.length) {
tempSet = realloc1(sofar,inputSet[index]);
outputSet[outputIndex] = tempSet;
outputIndex = combinations(inputSet,index+1,
copyItemSet(tempSet),outputSet,outputIndex+1);
index++;
}
// Return
return(outputIndex);
}
/* GET COMBINATTIONS */
/** Gets the number of possible combinations of a given item set.
@param set the given item set.
@return number of possible combinations. */
private int getCombinations(short[] set) {
int counter=0, numComb;
numComb = (int) Math.pow(2.0,set.length)-1;
// Return
return(numComb);
}
/* ---------------------------------------------------------------- */
/* */
/* MISCELANEOUS */
/* */
/* ---------------------------------------------------------------- */
/* COPY DATA ARRAY */
/** Makes a copy of the input data set.
@return copy of given item set. */
protected short[][] copyDataArray() {
return(copyItemSet(dataArray));
}
/* COPY ITEM SET */
/** Makes a copy of a given itemSet.
@param itemSet the given item set.
@return copy of given item set. */
protected short[] copyItemSet(short[] itemSet) {
// Check whether there is a itemSet to copy
if (itemSet == null) return(null);
// Do copy and return
short[] newItemSet = new short[itemSet.length];
for(int index=0;index If field numRules is temporarily altered this
method can be used to output a particular rule as the default.
@param linkRuleNode the currentNode. */
public void outputRulesWithDefault(RuleNode linkRuleNode) {
// Process node
if (linkRuleNode != null) {
// Check if at end
if (linkRuleNode.ruleNumber>numRules) return;
// Left branch
outputRulesWithDefault(linkRuleNode.leftBranch);
// Node
System.out.print("(" + linkRuleNode.ruleNumber + ") ");
if (linkRuleNode.ruleNumber==numRules) System.out.print("Default");
else outputItemSet(linkRuleNode.antecedent);
System.out.print(" -> ");
outputItemSet(linkRuleNode.consequent);
System.out.println(" " +
twoDecPlaces(linkRuleNode.confidenceForRule));
// Right branch
outputRulesWithDefault(linkRuleNode.rightBranch);
}
}
/* ----------------- */
/* OUTPUT STATISTICS */
/* ----------------- */
/* OUTPUT NUMBER OF RULES */
/** Outputs number of generated rules (ARs or CARS). */
public void outputNumRules() {
System.out.println("Number of rules = " + numRules);
}
/* OUTPUT CLASSIFIER STATISTICS */
/** Output statistic for the generated rule set. */
public void outputClassifierStats() {
System.out.println("Number of rules = " + numRules);
// Get maximum size of antecedent
int maxSizeOfAntecedent = getMaxSizeOfAntecedent();
System.out.println("Max size of antecedent = " + maxSizeOfAntecedent);
// Calculate distribution of antecedent sizes
int[] antecedentDistrib = new int[maxSizeOfAntecedent];
calcAntecedentDistrib(antecedentDistrib);
// Calculate average antecedent size.
double aveSizeOfAntecedent = getAveSizeOfAntecedent(antecedentDistrib);
System.out.println("Ave size of antecedent = " + aveSizeOfAntecedent);
// Output distribution
System.out.println("Antecedent size distribution");
for (int index=0;index WARNING will overwrite
existing data if stored in the same directory as the application
exacutable, data files are better stored in a separate "DataFiles"
directory. */
public void outputDataArrayToFile() throws IOException{
//Determin file name
int fileNameIndex = fileName.lastIndexOf('/');
String shortFileName = fileName.substring(fileNameIndex+1,
fileName.length());
// Open file for writing
PrintWriter outputFile = new PrintWriter(new FileWriter(shortFileName));
// Write contents of Data array to file
for (int rowIndex = 0;rowIndex
*/
protected void readFile() {
try {
// Dimension data structure
inputFormatOkFlag=true;
numRows = getNumberOfLines(fileName);
if (inputFormatOkFlag) {
dataArray = new short[numRows][];
// Read file
System.out.println("Reading input file: " + fileName);
readInputDataSet();
}
else
JOptionPane.showMessageDialog(null,"Error reading file: " +
fileName + "\n","FILE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(null,"Unknown error reading " +
"file: " + fileName + "\n","FILE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
closeFile();
System.exit(1);
}
}
/* GET NUMBER OF LINES */
/** Gets number of lines/records in input file and checks format of each
line.
@param nameOfFile the filename of the file to be opened.
@return the number of rows in the given file. */
protected int getNumberOfLines(String nameOfFile) throws IOException {
int counter = 0;
// Open the file
if (filePath==null) openFileName(nameOfFile);
else openFilePath();
// Loop through file incrementing counter
// get first row.
String line = fileInput.readLine();
while (line != null) {
checkLine(counter+1,line);
StringTokenizer dataLine = new StringTokenizer(line);
int numberOfTokens = dataLine.countTokens();
if (numberOfTokens == 0) break;
counter++;
line = fileInput.readLine();
}
// Close file and return
closeFile();
return(counter);
}
/* CHECK LINE */
/** Check whether given line from input file is of appropriate format
(space separated integers), if incorrectly formatted line found
inputFormatOkFlag set to false.
@param counter the line number in the input file.
@param str the current line from the input file. */
protected void checkLine(int counter, String str) {
for (int index=0;index
@param textArea the text area in the gui used for output. */
public void readFile(JTextArea textArea) {
try {
// Dimension data structure
inputFormatOkFlag=true;
numRows = getNumberOfLines(fileName);
if (inputFormatOkFlag) {
dataArray = new short[numRows][];
// Read file
textArea.append("Reading input file:\n" + filePath + "\n");
readInputDataSet();
// Set have data flag to true
haveDataFlag = true;
}
else {
// Set have data flag to false
haveDataFlag = false;
textArea.append("Error reading file:\n" + filePath + "\n\n");
}
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(null,"Error reading File",
"FILE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
textArea.append("Error reading File\n");
closeFile();
// Set have data flag to false
haveDataFlag = false;
}
}
/* ---------------------------------------------------------------- */
/* */
/* REORDER DATA SET ACCORDING TO ATTRIBUTE FREQUENCY */
/* */
/* ---------------------------------------------------------------- */
/* REORDER INPUT DATA: */
/** Reorders input data according to frequency of single attributes.
1 2 5
1 2 3
2 4 5
1 2 5
2 3 5
This would produce a countArray (ignore index 0 because there is no
attributr number 0):
+---+---+---+---+---+---+
| | 1 | 2 | 3 | 4 | 5 |
+---+---+---+---+---+---+
| | 3 | 5 | 2 | 1 | 4 |
+---+---+---+---+---+---+
Which sorts to:
+---+---+---+---+---+---+
| | 2 | 5 | 1 | 3 | 4 |
+---+---+---+---+---+---+
| | 5 | 4 | 3 | 2 | 1 |
+---+---+---+---+---+---+
Giving rise to the conversion Array of the form (no index 0):
+---+---+---+---+---+---+
| | 3 | 1 | 4 | 5 | 2 |
+---+---+---+---+---+---+
| | 3 | 5 | 2 | 1 | 4 |
+---+---+---+---+---+---+
Note that the first row gives the new attribute number (old attribute
number is the index). The second row here are the counts used to identify
the ordering but which now no longer play a role in the conversion
exercise. Thus the new column (attriburte) number for column/attribute 1 is
column 3 (i.e. the first vale at index 1). The reconversion array will be
of the form (values are the indexes from the conversion array while indexes
represent the first vlaue from the conversion array):
+---+---+---+---+---+---+
| | 2 | 5 | 1 | 3 | 4 |
+---+---+---+---+---+---+
For example to convert the attribute number 3 back to its original number
we look up the value at index 3.
*/
public void idInputDataOrdering() {
// Count singles and store in countArray;
int[][] countArray = countSingles();
// Bubble sort count array on support value (second index)
orderCountArray(countArray);
// Define conversion and reconversion arrays
defConvertArrays(countArray);
// Set sorted flag
isOrderedFlag = true;
}
/* COUNT SINGLES */
/** Counts number of occurrences of each single attribute in the
input data.
@return 2-D array where first row represents column numbers
and second row represents support counts. */
protected int[][] countSingles() {
// Dimension and initialize count array
int[][] countArray = new int[numCols+1][2];
for (int index=0;index