/* -------------------------------------------------------------------------- */ /* */ /* RULE LIST */ /* */ /* Frans Coenen */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* Tuesday 2 March 2004, Revised 11 Januray 2005 */ /* */ /* Bug reports: Thanks to Jian (Ella) Chen, Dept Comp. Sci. School of */ /* Information Sci. and Tech. Sun Yat-sen University, China. */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- RuleList */ // Java packages import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Set of utilities to support various Association Rule Mining (ARM) algorithms included in the LUCS-KDD suite of ARM programs. @author Frans Coenen @version 2 March 2004 */ public class RuleList extends AssocRuleMining { /* ------ FIELDS ------ */ // --- Data structures --- /** Rule node in linked list of rules (either ARs or CRs). */ protected class RuleNode { /** Antecedent of AR. */ protected short[] antecedent; /** Consequent of AR. */ protected short[] consequent; /** The Laplace accuracy associate with the rule represented by this node. */ double laplaceAccuracy=0.0; /** Link to next node */ RuleNode next = null; /** Three argument constructor @param antecedent the antecedent (LHS) of the AR. @param consequent the consequent (RHS) of the AR. @param accuracy the associated Laplace accuracy value. */ private RuleNode(short[] ante, short[]cons, double accuracy) { antecedent = ante; consequent = cons; laplaceAccuracy = accuracy; } } /** The reference to start of the rule list. */ protected RuleNode startRulelist = null; /* ------ CONSTRUCTORS ------ */ /** Default constructor to create an instance of the class RuleList */ public RuleList() { } /* ------ METHODS ------ */ /* -------------------------------------------------------------- */ /* */ /* RULE LINKED LIST ORDERED ACCORDING TO LAPLACE ACCURACY */ /* */ /* -------------------------------------------------------------- */ /* Methods for inserting rules into a linked list of rules ordered according to laplace accuracy (most accurate rule first). Each rule described in terms of 3 fields: 1) Antecedent (an item set), 2) a consequent (an item set), 3) a Laplace accuracy value (double). */ /* INSERT (ASSOCIATION/CLASSIFICATION) RULE INTO RULE LINKED LIST (ORDERED ACCORDING LAPLACE ACCURACY). */ /** Inserts an (association/classification) rule into the linked list of rules pointed at by startRulelist.
The list is ordered so that rules with highest confidence are listed first. If two rules have the same confidence the new rule will be placed after the existing rule. Thus, if using an Apriori approach to generating rules, more general rules will appear first in the list with more specific rules (i.e. rules with a larger antecedent) appearing later as the more general rules will be generated first. @param antecedent the antecedent (LHS) of the rule. @param consequent the consequent (RHS) of the rule. @param laplaceAccuracy the associated Laplace accuracy value for the rule.*/ protected void insertRuleintoRulelist(short[] antecedent, short[] consequent, double laplaceAccuracy) { // Create new node RuleNode newNode = new RuleNode(antecedent,consequent, laplaceAccuracy); // Empty list situation if (startRulelist == null) { startRulelist = newNode; return; } // Add new node to start if (laplaceAccuracy > startRulelist.laplaceAccuracy) { newNode.next = startRulelist; startRulelist = newNode; return; } // Add new node to middle RuleNode markerNode = startRulelist; RuleNode linkRuleNode = startRulelist.next; while (linkRuleNode != null) { if (laplaceAccuracy > linkRuleNode.laplaceAccuracy) { markerNode.next = newNode; newNode.next = linkRuleNode; return; } markerNode = linkRuleNode; linkRuleNode = linkRuleNode.next; } // Add new node to end markerNode.next = newNode; } /* -------------------------------------------------------- */ /* */ /* LAPLACE EXPECTED ERROR ESTIMATE */ /* */ /* -------------------------------------------------------- */ /* CALCULATE LAPLACE ACCURACY */ /** Determines Laplace expected error estimates (accuracies).
Note: only appropriate for classification rule generators, used by FOIL, PRM and CPAR. Calculated as follows:
Nc = Number of records in training set that include all the attributes
for the given rule antecedent + consequent, i.e. the total support
for the rule.
Ntot = Number of records in training set that include all the
attributes in the rule's antecedent, i.e. support for antecedent.
accuracy = (Nc+1)/(Ntot+numberOfClasses)
@param antecedent the antecedent of the given rule.
@param consequent the consequent of the given rule.
@return the Laplace accuracy. */
protected double getLaplaceAccuracy(short[] antecedent, short consequent) {
int totalCounter=0; // Ntot
int classCounter=0; // Nc
// Get number of records in training set that satisfy rule antecedent
// only (Ntot), and the size of the sunset of the identified records that
// also include the rule consequent (Nc)
for(int index=0;index Note rules are stored
according to Laplace accuracy (most accurate rule first), this is the
measure to be maximised to identify "best" rules.
@param kValue the maximum number of rules to be kept per class.
@param classification the possible classes. */
private void keepBestKrulesPerClass(int kValue, short[] classification) {
// Loop through classification array
for (int index=0;index Used in Best K Average (CPAR)
algorithm.
@param linkref The reference to the start of the existing list of rules.
@param itemset the record to be classified. */
private void obtainallRulesForRecord(RuleNode linkRef, short[] itemSet) {
RuleNode newStartRef = null;
RuleNode markerRef = null;
// Loop through linked list of existing rules
while (linkRef!=null) {
// If rule satisfies record add to new rule list
if (isSubset(linkRef.antecedent,itemSet)) {
RuleNode newNode = new RuleNode(linkRef.antecedent,
linkRef.consequent,linkRef.laplaceAccuracy);
if (newStartRef==null) newStartRef=newNode;
else markerRef.next=newNode;
markerRef=newNode;
}
linkRef=linkRef.next;
}
// Set rule list
startRulelist = newStartRef;
}
/* ----------------------------------- */
/* */
/* GET METHODS */
/* */
/* ----------------------------------- */
/* GET NUMBER OF RULES */
/** Returns the number of generated rules (usually used in
conjunction with classification rule mining algorithms rather than ARM
algorithms).
@return the number of CRs. */
protected int getNumCRs() {
int number = 0;
RuleNode linkRuleNode = startRulelist;
// Loop through linked list
while (linkRuleNode != null) {
number++;
linkRuleNode = linkRuleNode.next;
}
// Return
return(number);
}
/* ----------------------------------- */
/* */
/* SET METHODS */
/* */
/* ----------------------------------- */
/* SET NUMBER OF CLASSES */
/** Sets number of rows field. */
protected void setNumClasses(int numC) {
numClasses=numC;
}
/* SET DATA ARRAT */
/** Set 2-D "short" data array reference. */
protected void setDataArray(short[][] dArray) {
dataArray=dArray;
}
/* ------------------------------ */
/* */
/* OUTPUT */
/* */
/* ------------------------------ */
/* OUTPUT RULE LINKED LIST */
/** Outputs contents of rule linked list (if any) */
public void outputRules() {
outputRules(startRulelist);
}
/** Outputs given rule list.
@param ruleList the given rule list. */
private void outputRules(RuleNode ruleList) {
System.out.println("CLASSIFIER\n----------\b");
// Check for empty rule list
if (ruleList==null) System.out.println("No rules generated!");
// Loop through rule list
int number = 1;
RuleNode linkRuleNode = ruleList;
while (linkRuleNode != null) {
System.out.print("(" + number + ") ");
outputRule(linkRuleNode);
System.out.println(" " +
twoDecPlaces(linkRuleNode.laplaceAccuracy) + "%");
number++;
linkRuleNode = linkRuleNode.next;
}
}
/** Outputs a rule.
@param rule the rule to be output. */
private void outputRule(RuleNode rule) {
outputItemSet(rule.antecedent);
System.out.print(" -> ");
outputItemSet(rule.consequent);
}
/* OUTPUT NUMBER OF RULES */
/** Outputs number of generated rules (ARs or CARS). */
public void outputNumRules() {
System.out.println("Number of rules = " + getNumCRs());
}
}