/* -------------------------------------------------------------------------- */ /* */ /* FOIL (FIRST ORDER INDUCTIVE LEARNER) CAR GENERATOR */ /* */ /* Frans Coenen */ /* */ /* Tuesday 3 February 2004 */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- Classification | +-- FOIL_CARgen */ // Java packages import java.util.*; import java.io.*; /** Methods to produce classification rules using FOIL (First Order Inductive Learner) algorithm first proposed by Ross Quinlan in 1993. Assumes that input dataset is organised such that classifiers are at the end of each record. Note: number of classifiers value is stored in the numClasses field in AssocRuleMining parent class. @author Frans Coenen @version 3 February 2004 */ /* To compile: javaARMpackc.exe FOIL_CARgen.java */ public class FOIL_CARgen extends Classification { /* ------ FIELDS ------ */ // Data structures /** 2-D array to hold positive examples */ private short[][] positiveExamples = null; /** 2-D array to hold negative examples */ private short[][] negativeExamples = null; /** 2-D array to temporaily hold positive examples */ private short[][] positiveExamples2 = null; /** 2-D array to temporaily hold negative examples */ private short[][] negativeExamples2 = null; // Constants /** The maximum number of attributes that can be contained in a rule antecedent. */ private final int MAX_NUM_ATTS_IN_ANTECEDENT=3; /* ------ CONSTRUCTORS ------ */ /** Constructor processes command line arguments. @param args the command line arguments (array of String instances). */ public FOIL_CARgen(String[] args) { super(args); } /* ------ METHODS ------ */ /* START CLASSIFICATION */ /** Starts classification rule generation process. @return The classification accuracay (%). */ public double startClassification() { // Calculate minimum support threshold in terms of number of // records in the training set. System.out.println("START FOIL CLASSIFICATION\n" + "-------------------------"); System.out.println("Max number of attributes per rule = " + MAX_NUM_ATTS_IN_ANTECEDENT); // Set rule list to null. Note that startRuleList is defined in the // AssocRuleMining parent class and is also used to store Association // Rules (ARS) with respect ARM. currentRlist.startRulelist = null; // Set DataArray and number of classes fields currentRlist.setDataArray(dataArray); currentRlist.setNumClasses(numClasses); // Check for classifier array if (classifiers==null) { System.out.println("ERROR: no classifiers array! To create a " + "classifiers array use createClassifiersArray(), " + "contained in ClassAprioriTserial class, called " + "from ther application class."); System.exit(1); } // Start FOIL generation process startFOIL(); // Test classification using the test set and return accuracy. return(twoDecPlaces(testClassification())); } /* START FOIL CLASSIFICATION *. /** Commences FOIL process to generate CARs.
Commence by, for each
class, generating the positive and negative examples. Also generate an
attribute array to store gain values. */
private void startFOIL() {
// Generate attribute array
attributes = new double[numOneItemSets-numClasses+1][2];
for(int attIndex=0;attIndex Note the generic rule
array also has field for support and confidence that are not used here.
@param antecedent the antecedent of the discovered rule.
@param consequent the consequent of the discovered rule. */
private void foundRule(short[] antecedent, short[] consequent) {
// It is possible, if we have no negative examples (N={}), to
// produce a rule with an empty antecedent --- such a rule should
// not be included in the rule list.
if (antecedent==null) return;
// Insert discovered rule into rule list according to Laplace accuracy
// estimate.
double laplace = currentRlist.getLaplaceAccuracy(antecedent,
consequent[0]);
currentRlist.insertRuleintoRulelist(antecedent,consequent,laplace);
// Revise positive examples
positiveExamples=removeExamplesThatDoSatRule(antecedent,
positiveExamples);
}
/* ------------------------------------------------------------ */
/* */
/* GAIN CALCULATIONS */
/* */
/* ------------------------------------------------------------ */
/* CALCULATE GAINS */
/** Loops through available attributes and determine gain for each if
attribute added to rule. Attribute is unavailable (i.e. has already been
used in rule) if first element for attribute set to '1.0'
@param antecedent the given antecedent.
@param oldPosCount number of previous positive examples.
@param oldNegCount number of previous negative examples. */
private void calculateGains(short[] antecedent, double oldPosCount,
double oldNegCount) {
// Loop through attribute list
for (int index=1;index