/* -------------------------------------------------------------------------- */ /* */ /* PRM (PREDICTIVE RULE MINING) CAR GENERATOR */ /* */ /* Frans Coenen */ /* */ /* Tuesday 3 February 2004 */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- Classification | +-- PRM_CARgen */ // Java packages import java.util.*; import java.io.*; /** Methods to produce classification rules using PRM (Predictive Rule Mining) algorithm first proposed by Xiaoxin Yin and Jiawei Han. Assumes that input dataset is organised such that classifiers are at the end of each record. Note: number of classifiers value is stored in the numClasses field. @author Frans Coenen @version 3 February 2004 */ /* To compile: javaARMpackc.exe PRM_CARgen.java */ public class PRM_CARgen extends Classification { /* ------ FIELDS ------ */ // Nested classes /** Structure to store an example (positive or negative) records with weighting. */ protected class ExamplesStruct { /** Item set (example record). */ short[] itemSet; /** Weighting for the itemset. */ double weighting=1.0; /** One argument constructor @param record the itemset representing a record. */ protected ExamplesStruct(short[] record) { itemSet = copyItemSet(record); } /** Two argument constructor @param record the itemset representing a record. @param weight the weighting for the example record. */ protected ExamplesStruct(short[] record, double weight) { itemSet = copyItemSet(record); weighting = weight; } } // Data structures /** 2-D array to hold positive examples */ protected ExamplesStruct[] positiveExamples = null; /** 2-D array to hold negative examples */ protected ExamplesStruct[] negativeExamples = null; /** 2-D array to temporaily hold positive examples, what Xiaoxin Yin and Jiawei Han refer to as P'. */ protected ExamplesStruct[] positiveExamples2 = null; /** 2-D array to temporaily hold negative examples, what Xiaoxin Yin and Jiawei Han refer to as N'. */ protected ExamplesStruct[] negativeExamples2 = null; /** 2-D PNarray for storing size of current negative and positive example sets, what Xiaoxin Yin and Jiawei Han refer to as A. */ protected double[][] pn_array = null; /** 2-D temporary PN array for storing size of current negative and positive example sets, what Xiaoxin Yin and Jiawei Han refer to as A'. */ protected double[][] pn_array2 = null; // Constants /** Minimum total weight threshold */ protected final double TOTAL_WEIGHT_FACTOR=0.05; /** Weighting decrement */ protected final double DECAY_FACROR=1.0/3.0; /* ------ CONSTRUCTORS ------ */ /** Constructor processes command line arguments. @param args the command line arguments (array of String instances). */ public PRM_CARgen(String[] args) { super(args); } /* ------ METHODS ------ */ /* START CLASSIFICATION */ /** Starts classification rule generation process. @return The classification accuracy (%). */ public double startClassification() { System.out.println("START PRM CLASSIFICATION\n" + "------------------------"); // Set rule list to null. Note that startRuleList is defined in the // AssocRuleMining parent class and is also used to store Association // Rules (ARS) with respect to ARM. currentRlist.startRulelist = null; // Set DataArray and number of classes fields currentRlist.setDataArray(dataArray); currentRlist.setNumClasses(numClasses); // Check for classifier array if (classifiers==null) { System.out.println("ERROR: no classifiers array! To create a " + "classifiers array use createClassifiersArray(), " + "contained in ClassAprioriTserial class, called " + "from the application class."); System.exit(1); } // Start PRM generation process startPRM(); // Process rules processRules(); // Test classification using the test set and return accuracy. return(twoDecPlaces(testClassification())); } /* START PRM CLASSIFICATION */ /** Commences PRM process to generate CARs.
Proceeds as follows for
each class:
1) Generate attribute array (for computational convenience only).
2) Generate P and N example data sets.
2) Calculate weighting threshold.
3) While current weighting for P is greater than threshold:
a) Copy P, N and A to: get P', N' and A'.
b) If first iteration copy PN to PN', else combine PN' positive weightings
from previous iteration and original negative weightings to form
new PN array.
c) If not possible to generate further antecedent attributes break.
d) Generate rule. */
private void startPRM() {
// Generate attribute array
attributes = new double[numOneItemSets-numClasses+1][2];
for(int attIndex=0;attIndex Note the generic rule array also has field for support
and confidence that are not used here.
@param antecedent the antecedent of the discovered rule.
@param consequent the consequent of the discovered rule. */
protected void foundRule(short[] antecedent, short[] consequent) {
// It is possible, if we have no negative examples (N={}), to
// produce a rule with an empty antecedent --- such a rule should
// not be included in the rule list.
if (antecedent==null) return;
// Insert discovered rule into rule list
double laplace = currentRlist.getLaplaceAccuracy(antecedent,
consequent[0]);
currentRlist.insertRuleintoRulelist(antecedent,consequent,laplace);
// Revise positive examples
revisePosExDoSatRule(antecedent,positiveExamples);
}
/* ------------------------------------------------------------ */
/* */
/* GAIN CALCULATIONS */
/* */
/* ------------------------------------------------------------ */
/* CALCULATE GAINS */
/** Loops through available attributes and determine gain for each if
attribute added to rule. Attribute is unavailable (i.e. has already been
used in rule) if first element for attribute set to '1.0'
@param oldPosWeigth total weighting represented by previous positive
examples.
@param oldNegWeigth total weighting represented by previous negative
examples. */
protected void calculateGains(double oldPosWeigth, double oldNegWeigth) {
// Loop through attribute list
for (int index=1;index
@param flag 0=positive examples, 1=negative examples.
@param antecedent the given antecedent for the rule.
@param examples the given examples data set.
@return revise example data set with records that do not satisfy rule
removed. */
protected ExamplesStruct[] removeExDoNotSatRule(int flag,
short[] antecedent, ExamplesStruct[] examples) {
// Return null if input array is empty.
if (examples==null) return(null);
// Dimension new array
int size = getNumExDoSatRule(antecedent,examples);
// If new array is to be empty all weightings in the copy of the PN
// array associated with this example structure array must be 0.0
if (size==0) {
for (int index=1;index