/* -------------------------------------------------------------------------- */ /* */ /* APRIORI-TFP CBA (CLASSIFICATION BASED ON ASSOCIATIONS) */ /* */ /* Frans Coenen */ /* */ /* Friday 12 March 2004 */ /* (Bug fixes and maintenance: 10/2/2005, 12/10/2006, 5/3/2012) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree | +--AprioriTFPclass | +-- AprioriTFP_CARgen | +-- AprioriTFP_CBA */ // Java packages import java.util.*; import java.io.*; // Java GUI packages import javax.swing.*; /** Methods to produce classification rules using Wenmin Li, Jiawei Han and Jian Pei's CBA (Classification based on Multiple associate Rules) algorithm but founded on Apriori-TFP. Assumes that input dataset is orgnised such that classifiers are at the end of each record. Note: number of classifiers value is stored in the numClasses field. @author Frans Coenen @version 12 October 2006 */ public class AprioriTFP_CBA extends AprioriTFP_CARgen { /* ------ FIELDS ------ */ // --- Data structures --- /** Structure to store rules that the current ruke overrrides, ie rules that have lower precedence than the given rule.
Used where the current
rule wrongly classifies a record and the alternative lower precedence rule
(overriden by the current rule) correctly classifies the same record.*/
private class Overrides {
/** A potential cRule that may Overrides an alternative, but lower
precedence, cRule. */
RuleNodeCBA cRule;
/** The record TID in the training set. */
int tid;
/** The associated class label. */
short classLabel;
/** Link to next node. */
Overrides next = null;
/** Three argument constructor.
@param cr the given crule the correctly classifies at least one record.
@param tidNumber the TID number foe arecord in the training set.
@param cl the class label for the given record. */
private Overrides(RuleNodeCBA cr, int tidNumber, short cl) {
cRule = cr;
tid = tidNumber;
classLabel = cl;
}
}
/** Rule node in linked list of rules for CBA Classification rules. */
private class RuleNodeCBA {
/** The Antecedent of the CAR. */
private short[] antecedent;
/** The Consequent of the CAR. */
private short[] consequent;
/** List of possible alternative rules that can be used to replace the
current rules. */
Overrides replaceList = null;
/** The confidence value associate with the rule represented by this
node. */
private double confidenceForRule=0.0;
/** The support value associate with the rule represented by this
node. */
private double supportForRule=0.0;
/** The (local) dustribution array for the rule describing the number
of records covered/satisfied per class (taking into consideration
records already covered by previous rules with higher precedence). */
private int[] classCasesCovered = new int[numClasses];
/** A flag to indicate whether the rule is a cRule for at least one
record. */
private boolean isAcRule = false;
/** A flag to indicate whether the rule Is a strong cRule for at
least one record, i.e. is has greater orecedence than the associated
wRule. */
private boolean isAstrongCrule = false;
/** The default class associated with this rule. */
private short defaultClass = 0;
/** The total errors associated with this rule. */
private int totalErrors = 0;
/** Link to next node. */
private RuleNodeCBA next = null;
/** Four argument constructor
@param antecedent the antecedent (LHS) of the AR.
@param consequent the consequent (RHS) of the AR.
@param consvalue the associated confidence value.
@param suppValue the associated support value. */
private RuleNodeCBA(short[] ante, short[]cons, double confValue,
double suppValue) {
antecedent = ante;
consequent = cons;
confidenceForRule = confValue;
supportForRule = suppValue;
// Initialise class records array
for (int index=0;index Proceeds as follows: For each rule generated add to rule list if: (i)
Chi-Squared value is above a specified critical threshold (5% by default),
and (ii) the CR tree does not contain a more general rule with a higher
ordering. Rule added to rule list according to CBA ranking (ordering).
@param itemSetSofar the label for a T-treenode as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param consequent the current consequent (classifier) for the CAR.
@param linkRef the reference to the current array lavel in the T-tree. */
protected void generateCARsRightLevel(short[] itemSetSofar, int size,
short[] consequent, TtreeNode[] linkRef) {
// Loop through T-tree array
for (int index=1; index < size; index++) {
// Check if node exists
if (linkRef[index] != null) {
// Generate Antecedent
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Determine confidence
double confidenceForCAR = getConfidence(tempItemSet,
linkRef[index].support);
// Add CAR to linked list structure if confidence greater
// than minimum confidence threshold.
if (confidenceForCAR >= confidence) {
numCarsSoFar++;
double suppForConcequent = (double)
getSupportForItemSetInTtree(consequent);
insertRinRlistCBAranking(tempItemSet,consequent,
confidenceForCAR,linkRef[index].support);
}
}
}
}
/* ---------------------------------------------------------------- */
/* */
/* RULE LINKED LIST ORDERED ACCORDING TO CBA RANKING */
/* */
/* ---------------------------------------------------------------- */
/* Methods for inserting rules into a linked list of rules ordered
according to CBA ranking. Each rule described in terms of 4 fields: 1)
Antecedent (an item set), 2) a consequent (an item set), 3) a total support
value and 4) a confidence value (double). */
/* INSERT ASSOCIATION CLASSIFICATION) RULE INTO RULE LINKED LIST (ORDERED
ACCORDING TO CBA RANKING). */
/** Inserts an (association/classification) rule into the linkedlist of
rules pointed at by startCBArulelist. List is ordered according
to "CBA" ranking.
@param antecedent the antecedent (LHS) of the rule.
@param consequent the consequent (RHS) of the rule.
@param confidenceForRule the associated confidence value.
@param supportForRule the associated support value. */
protected void insertRinRlistCBAranking(short[] antecedent,
short[] consequent, double confidenceForRule,
double supportForRule) {
// Create new node
RuleNodeCBA newNode = new RuleNodeCBA(antecedent,consequent,
confidenceForRule,supportForRule);
// Empty list situation
if (startCBAruleList == null) {
startCBAruleList = newNode;
return;
}
// Add new node to start
if (ruleIsCBAgreater(newNode,startCBAruleList)) {
newNode.next = startCBAruleList;
startCBAruleList = newNode;
return;
}
// Add new node to middle
RuleNodeCBA markerNode = startCBAruleList;
RuleNodeCBA linkRuleNode = startCBAruleList.next;
while (linkRuleNode != null) {
if (ruleIsCBAgreater(newNode,linkRuleNode)) {
markerNode.next = newNode;
newNode.next = linkRuleNode;
return;
}
markerNode = linkRuleNode;
linkRuleNode = linkRuleNode.next;
}
// Add new node to end
markerNode.next = newNode;
}
/* RULE IS CBA GREATER */
/** Compares two rules and returns true if the first is "CBA greater" (has
a higher ranking) than the second. CBA ordering (same as CMAR) is as
follows:
Done in four stages: Procceds as follows, for each record in the training set:
*/
public void startClassification() {
String s = "START APRIORI-TFP CBA\n------------------------------\n" +
"\nMax number of CARS = " + MAX_NUM_CARS +
"\nMax size antecedent = " + MAX_SIZE_OF_ANTECEDENT + "\n";
if (textArea==null) System.out.println(s);
else textArea.append(s);
// Proceed
startClassification2();
}
/** Starts CBA classifier generation proces, GUI version.
@param tArea the text area to output data to. */
public void startClassification(JTextArea tArea) {
// Set text area
textArea = tArea;
// proceed
startClassification2();
}
/** Continues process of starting the CMAR classifier generation proces. */
protected void startClassification2() {
// Set data structure references to null
startSetAlist = null;
startCBAruleList = null;
// Generate all CARs using Apriori-TFP and place selected CARs into
// linked list of rules.
startCARgeneration2();
// Prune linked list of rules using CBA "cover" principal
pruneUsingCBAapproach(copyItemSet(dataArray));
// Output rule list (uses local method).
if (ruleListAttNumOutputFlag || ruleListSchemaOutputFlag)
outputCBArules();
// Test classification using the test set.
testClassification();
}
/*----------------------------------------------------------------------- */
/* */
/* APRIORI-TFP CBA WITH TEN CROSS VALIDATION (TCV) */
/* */
/*----------------------------------------------------------------------- */
/* COMMEMCE TEN CROSS VALIDATION WITH OUTPUT*/
/** Start CBA Ten Cross Validation (TCV) process with output of individual
accuracies. */
public void commenceTCVwithOutput() {
double[][] parameters = new double[10][5];
String s = "START TCV APRIORI-TFP CBA CLASSIFICATION\n" +
"------------------------------\nMax number of CARS = " +
MAX_NUM_CARS + "\nMax size antecedent = " +
MAX_SIZE_OF_ANTECEDENT + "\n";
if (textArea==null) System.out.println(s);
else textArea.append(s);
// Loop through tenths data sets
for (int index=0;index<10;index++) {
s = "[--- " + (index+1) + " ---] ";
if (textArea==null) System.out.println(s);
else textArea.append(s);
// Create training and test sets
createTrainingAndTestDataSets(index);
// Mine data, produce T-tree and generate CRs
startClassification2();
parameters[index][0] = accuracy;
parameters[index][1] = aucValue;
parameters[index][2] = numFrequentSets;
parameters[index][3] = numUpdates;
// Final rule set contained in the standrad list of rules not the
// CBA rule list defined in this class
parameters[index][4] = getNumRules();
}
// Output
tcvOutput(parameters);
}
/** Start CBA Ten Cross Validation (TCV) process with output of individual
accuracies, GUI version.
@param tArea the given instance of the JTextArea class. */
public void commenceTCVwithOutput(JTextArea tArea) {
textArea = tArea;
// Proceed
commenceTCVwithOutput();
}
/*----------------------------------------------------------------------- */
/* */
/* CLASSIFICATION ASSOCIATION RULE (CAR) GENERATION */
/* */
/*----------------------------------------------------------------------- */
/* GENERATE CLASSIFICATION ASSOCIATION RULES (RIGHT LEVEL). */
/** Generating classificationh association rules from a given array of
T-tree nodes.
@param rule1 the given rule to be compared to the second.
@param rule2 the rule which the given rule1 is to be compared to.
@return true id rule1 is greater then rule2, and false otherwise. */
private boolean ruleIsCBAgreater(RuleNodeCBA rule1, RuleNodeCBA rule2) {
// Compare confidences
if (rule1.confidenceForRule > rule2.confidenceForRule) return(true);
// If confidences are the same compare support values
if (similar2dec(rule1.confidenceForRule,rule2.confidenceForRule)) {
if (rule1.supportForRule > rule2.supportForRule) return(true);
// If confidences and supports are the same compare antecedents
if (similar2dec(rule1.supportForRule,rule2.supportForRule)) {
if (rule1.antecedent.length < rule2.antecedent.length)
return(true);
}
}
// Otherwise return false
return(false);
}
/* ---------------------------------------------------------------- */
/* */
/* PRUNE CBA RULES */
/* */
/* ---------------------------------------------------------------- */
/* PRUNE CBA RULES */
/** Commences process of generating local distribution arrays and producing
rthe final classifier.
*/
protected void pruneUsingCBAapproach(short[][] trainingSet) {
// Check for empty rule list
if (startCBAruleList==null) {
System.out.println("NO RULES GENERATED");
return;
}
// (1) Find C and W rules
findCandWrules(trainingSet);
// (2) Process wrongly classified records
procesSetAlinkedList(trainingSet);
// (3) Process rule list
processRuleList(trainingSet);
// (4) enerate classifier
generateClassifier();
}
/* -------------------------------------------------------- */
/* STAGE 1: C AND W RULE IDENTIFICATION */
/* -------------------------------------------------------- */
/* FIND C AND W RULES */
/** Processes training set and identifies appropriate cRules and wRules.
On completion: (i) all records which are wrongly classified (but for which
there is a corresponding cRule) will be collated into a linked list of SetA
structres ready for further consideration and (ii) all rules which are
"strong" cRules with respect to at least one record will be identified.
@param trainingSet the array of arrays of training set records. */
private void findCandWrules(short[][] trainingSet) {
for(int index=0;index
Note that we only have to consider rules marked as cRules (with respect to some other record(s)), other rules can safely be removed from the data set. Proceeds as follows: For each rule between the wRule and the cRule that are CRules with respect to some other record:
Commences with the generation of a global class distribution
array which contains the number of training cases for each class. Next the
"overriden" rule list for each "strong" cRule is considered, if such a
list exists, it is processed. Next the default class is identified and the
total error count calculated.
@param trainingSet the array of arrays of training set records. */
private void processRuleList(short[][] trainingSet) {
// Local fields
int ruleErrors=0;
// Class distribution array
int[] classDistrib = genClassclassCasesCovered(trainingSet);
// Process rule list
RuleNodeCBA linkRef = startCBAruleList;
while(linkRef!=null) {
// Consider "strong" cRules only
if (linkRef.isAstrongCrule) {
// Get index in class records array for consequent of rule
int classIndex = linkRef.consequent[0]-numOneItemSets+
numClasses-1;
// Check that rule correctly satisfies at least one record
// (otherwise ignore).
if (linkRef.classCasesCovered[classIndex] != 0) {
// Process overrides linked list for current "strong" cRule
Overrides orLinkRef = linkRef.replaceList;
while (orLinkRef != null) {
int cIndex = orLinkRef.classLabel-numOneItemSets+
numClasses-1;
// The TID referenced by the current overriding rule
// Is already satisfied by a previous rule decrement
// Current rule distribution array, otherwise
// decrement overriding rule's distribution array
if (coveredByPreviousRule(orLinkRef.tid,linkRef,
trainingSet)) {
linkRef.classCasesCovered[cIndex]--;
}
else orLinkRef.cRule.classCasesCovered[cIndex]--;
orLinkRef = orLinkRef.next;
}
// Determine number of accumulated miss-classifications
// that will result if the given rule were the last rule in
// the classifier (i.e. the default rule)
for (int index=0;index