/* -------------------------------------------------------------------------- */ /* */ /* APRIORI-TFP CBA (CLASSIFICATION BASED ON ASSOCIATIONS) */ /* */ /* Frans Coenen */ /* */ /* Friday 12 March 2004 */ /* (Bug fixes and maintenance: 10/2/2005, 12/10/2006, 5/3/2012) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree | +--AprioriTFPclass | +-- AprioriTFP_CARgen | +-- AprioriTFP_CBA */ // Java packages import java.util.*; import java.io.*; // Java GUI packages import javax.swing.*; /** Methods to produce classification rules using Wenmin Li, Jiawei Han and Jian Pei's CBA (Classification based on Multiple associate Rules) algorithm but founded on Apriori-TFP. Assumes that input dataset is orgnised such that classifiers are at the end of each record. Note: number of classifiers value is stored in the numClasses field. @author Frans Coenen @version 12 October 2006 */ public class AprioriTFP_CBA extends AprioriTFP_CARgen { /* ------ FIELDS ------ */ // --- Data structures --- /** Structure to store rules that the current ruke overrrides, ie rules that have lower precedence than the given rule.

Used where the current rule wrongly classifies a record and the alternative lower precedence rule (overriden by the current rule) correctly classifies the same record.*/ private class Overrides { /** A potential cRule that may Overrides an alternative, but lower precedence, cRule. */ RuleNodeCBA cRule; /** The record TID in the training set. */ int tid; /** The associated class label. */ short classLabel; /** Link to next node. */ Overrides next = null; /** Three argument constructor. @param cr the given crule the correctly classifies at least one record. @param tidNumber the TID number foe arecord in the training set. @param cl the class label for the given record. */ private Overrides(RuleNodeCBA cr, int tidNumber, short cl) { cRule = cr; tid = tidNumber; classLabel = cl; } } /** Rule node in linked list of rules for CBA Classification rules. */ private class RuleNodeCBA { /** The Antecedent of the CAR. */ private short[] antecedent; /** The Consequent of the CAR. */ private short[] consequent; /** List of possible alternative rules that can be used to replace the current rules. */ Overrides replaceList = null; /** The confidence value associate with the rule represented by this node. */ private double confidenceForRule=0.0; /** The support value associate with the rule represented by this node. */ private double supportForRule=0.0; /** The (local) dustribution array for the rule describing the number of records covered/satisfied per class (taking into consideration records already covered by previous rules with higher precedence). */ private int[] classCasesCovered = new int[numClasses]; /** A flag to indicate whether the rule is a cRule for at least one record. */ private boolean isAcRule = false; /** A flag to indicate whether the rule Is a strong cRule for at least one record, i.e. is has greater orecedence than the associated wRule. */ private boolean isAstrongCrule = false; /** The default class associated with this rule. */ private short defaultClass = 0; /** The total errors associated with this rule. */ private int totalErrors = 0; /** Link to next node. */ private RuleNodeCBA next = null; /** Four argument constructor @param antecedent the antecedent (LHS) of the AR. @param consequent the consequent (RHS) of the AR. @param consvalue the associated confidence value. @param suppValue the associated support value. */ private RuleNodeCBA(short[] ante, short[]cons, double confValue, double suppValue) { antecedent = ante; consequent = cons; confidenceForRule = confValue; supportForRule = suppValue; // Initialise class records array for (int index=0;indexAssocRuleMining class. */ public AprioriTFP_CBA(AssocRuleMining armInstance) { super(armInstance); } /* ------ METHODS ------ */ /*--------------------------------------------------------------------- */ /* */ /* START CBA CLASSIFICATION */ /* */ /*--------------------------------------------------------------------- */ /* START CBA CLASSIFICATION */ /** Starts CBA classifier generation proces.

Proceeds as follows:

  1. Generate all CARs using Apriori-TFP and place selected CARs into linked list of rules.
  2. Prune list according the cover stratgey.
  3. Test classification using Chi-Squared Weighting approach.
*/ public void startClassification() { String s = "START APRIORI-TFP CBA\n------------------------------\n" + "\nMax number of CARS = " + MAX_NUM_CARS + "\nMax size antecedent = " + MAX_SIZE_OF_ANTECEDENT + "\n"; if (textArea==null) System.out.println(s); else textArea.append(s); // Proceed startClassification2(); } /** Starts CBA classifier generation proces, GUI version. @param tArea the text area to output data to. */ public void startClassification(JTextArea tArea) { // Set text area textArea = tArea; // proceed startClassification2(); } /** Continues process of starting the CMAR classifier generation proces. */ protected void startClassification2() { // Set data structure references to null startSetAlist = null; startCBAruleList = null; // Generate all CARs using Apriori-TFP and place selected CARs into // linked list of rules. startCARgeneration2(); // Prune linked list of rules using CBA "cover" principal pruneUsingCBAapproach(copyItemSet(dataArray)); // Output rule list (uses local method). if (ruleListAttNumOutputFlag || ruleListSchemaOutputFlag) outputCBArules(); // Test classification using the test set. testClassification(); } /*----------------------------------------------------------------------- */ /* */ /* APRIORI-TFP CBA WITH TEN CROSS VALIDATION (TCV) */ /* */ /*----------------------------------------------------------------------- */ /* COMMEMCE TEN CROSS VALIDATION WITH OUTPUT*/ /** Start CBA Ten Cross Validation (TCV) process with output of individual accuracies. */ public void commenceTCVwithOutput() { double[][] parameters = new double[10][5]; String s = "START TCV APRIORI-TFP CBA CLASSIFICATION\n" + "------------------------------\nMax number of CARS = " + MAX_NUM_CARS + "\nMax size antecedent = " + MAX_SIZE_OF_ANTECEDENT + "\n"; if (textArea==null) System.out.println(s); else textArea.append(s); // Loop through tenths data sets for (int index=0;index<10;index++) { s = "[--- " + (index+1) + " ---] "; if (textArea==null) System.out.println(s); else textArea.append(s); // Create training and test sets createTrainingAndTestDataSets(index); // Mine data, produce T-tree and generate CRs startClassification2(); parameters[index][0] = accuracy; parameters[index][1] = aucValue; parameters[index][2] = numFrequentSets; parameters[index][3] = numUpdates; // Final rule set contained in the standrad list of rules not the // CBA rule list defined in this class parameters[index][4] = getNumRules(); } // Output tcvOutput(parameters); } /** Start CBA Ten Cross Validation (TCV) process with output of individual accuracies, GUI version. @param tArea the given instance of the JTextArea class. */ public void commenceTCVwithOutput(JTextArea tArea) { textArea = tArea; // Proceed commenceTCVwithOutput(); } /*----------------------------------------------------------------------- */ /* */ /* CLASSIFICATION ASSOCIATION RULE (CAR) GENERATION */ /* */ /*----------------------------------------------------------------------- */ /* GENERATE CLASSIFICATION ASSOCIATION RULES (RIGHT LEVEL). */ /** Generating classificationh association rules from a given array of T-tree nodes.

For each rule generated add to rule list if: (i) Chi-Squared value is above a specified critical threshold (5% by default), and (ii) the CR tree does not contain a more general rule with a higher ordering. Rule added to rule list according to CBA ranking (ordering). @param itemSetSofar the label for a T-treenode as generated sofar. @param size the length/size of the current array lavel in the T-tree. @param consequent the current consequent (classifier) for the CAR. @param linkRef the reference to the current array lavel in the T-tree. */ protected void generateCARsRightLevel(short[] itemSetSofar, int size, short[] consequent, TtreeNode[] linkRef) { // Loop through T-tree array for (int index=1; index < size; index++) { // Check if node exists if (linkRef[index] != null) { // Generate Antecedent short[] tempItemSet = realloc2(itemSetSofar,(short) index); // Determine confidence double confidenceForCAR = getConfidence(tempItemSet, linkRef[index].support); // Add CAR to linked list structure if confidence greater // than minimum confidence threshold. if (confidenceForCAR >= confidence) { numCarsSoFar++; double suppForConcequent = (double) getSupportForItemSetInTtree(consequent); insertRinRlistCBAranking(tempItemSet,consequent, confidenceForCAR,linkRef[index].support); } } } } /* ---------------------------------------------------------------- */ /* */ /* RULE LINKED LIST ORDERED ACCORDING TO CBA RANKING */ /* */ /* ---------------------------------------------------------------- */ /* Methods for inserting rules into a linked list of rules ordered according to CBA ranking. Each rule described in terms of 4 fields: 1) Antecedent (an item set), 2) a consequent (an item set), 3) a total support value and 4) a confidence value (double). */ /* INSERT ASSOCIATION CLASSIFICATION) RULE INTO RULE LINKED LIST (ORDERED ACCORDING TO CBA RANKING). */ /** Inserts an (association/classification) rule into the linkedlist of rules pointed at by startCBArulelist.

List is ordered according to "CBA" ranking. @param antecedent the antecedent (LHS) of the rule. @param consequent the consequent (RHS) of the rule. @param confidenceForRule the associated confidence value. @param supportForRule the associated support value. */ protected void insertRinRlistCBAranking(short[] antecedent, short[] consequent, double confidenceForRule, double supportForRule) { // Create new node RuleNodeCBA newNode = new RuleNodeCBA(antecedent,consequent, confidenceForRule,supportForRule); // Empty list situation if (startCBAruleList == null) { startCBAruleList = newNode; return; } // Add new node to start if (ruleIsCBAgreater(newNode,startCBAruleList)) { newNode.next = startCBAruleList; startCBAruleList = newNode; return; } // Add new node to middle RuleNodeCBA markerNode = startCBAruleList; RuleNodeCBA linkRuleNode = startCBAruleList.next; while (linkRuleNode != null) { if (ruleIsCBAgreater(newNode,linkRuleNode)) { markerNode.next = newNode; newNode.next = linkRuleNode; return; } markerNode = linkRuleNode; linkRuleNode = linkRuleNode.next; } // Add new node to end markerNode.next = newNode; } /* RULE IS CBA GREATER */ /** Compares two rules and returns true if the first is "CBA greater" (has a higher ranking) than the second.

CBA ordering (same as CMAR) is as follows:

  1. Confidence, a rule r1 has priority over a rule r2 if confidence(r1) > confidence(r2).
  2. Support, a rule r1 has priority over a rule r2 if confidence(r1)==confidence(r2) && support(r1)>support(r2) .
  3. Size of antecedent, a rule r1 has priority over a rule r2 if confidence(r1)==confidence(r2) && support(r1)==spoort(r2) &&|Ar1|<|Ar2| .
@param rule1 the given rule to be compared to the second. @param rule2 the rule which the given rule1 is to be compared to. @return true id rule1 is greater then rule2, and false otherwise. */ private boolean ruleIsCBAgreater(RuleNodeCBA rule1, RuleNodeCBA rule2) { // Compare confidences if (rule1.confidenceForRule > rule2.confidenceForRule) return(true); // If confidences are the same compare support values if (similar2dec(rule1.confidenceForRule,rule2.confidenceForRule)) { if (rule1.supportForRule > rule2.supportForRule) return(true); // If confidences and supports are the same compare antecedents if (similar2dec(rule1.supportForRule,rule2.supportForRule)) { if (rule1.antecedent.length < rule2.antecedent.length) return(true); } } // Otherwise return false return(false); } /* ---------------------------------------------------------------- */ /* */ /* PRUNE CBA RULES */ /* */ /* ---------------------------------------------------------------- */ /* PRUNE CBA RULES */ /** Commences process of generating local distribution arrays and producing rthe final classifier.

Done in four stages:

  1. C AND W RULE IDENTIFICATION: Identify all cRules and wRules in the rule list. Record in the appropriate distribution arrays the number of records covered by strong cRules. Where ever the wRule has higher precedence than the corresponding cRule store the effected training set records in a linked list of SetA structures, A.
  2. CONSIDER WRONGLY CLASSIFIED RECORDS: Process the set A (the list of records that are wrongly classified). If the "offending" wRule can safely be removed from R identify other rules that override (have higher precedence) the cRule corresponding to the wRule. If the offending wRule cannot be removed (because it is a CRule with respect to some other record) then adjust local distribution array for the wRule accordingly.
  3. DETERMINE DEFAULT CLASSES AND TOTAL ERROR COUNTS: Adjust local distribution arrays with respect to overridden rules identified in Stage 2. Then for each "strong" cRule determine the deafault class and the total error count.
  4. GENERATE CLASSIFIER Create classifier.
*/ protected void pruneUsingCBAapproach(short[][] trainingSet) { // Check for empty rule list if (startCBAruleList==null) { System.out.println("NO RULES GENERATED"); return; } // (1) Find C and W rules findCandWrules(trainingSet); // (2) Process wrongly classified records procesSetAlinkedList(trainingSet); // (3) Process rule list processRuleList(trainingSet); // (4) enerate classifier generateClassifier(); } /* -------------------------------------------------------- */ /* STAGE 1: C AND W RULE IDENTIFICATION */ /* -------------------------------------------------------- */ /* FIND C AND W RULES */ /** Processes training set and identifies appropriate cRules and wRules.

Procceds as follows, for each record in the training set:

  1. Identify the first rule (in the ordered list of CBA rules) that correctly classifies the record and the first rule that wrongly classifies the record (these are the corresponding cRule and wRule for the record).
  2. Compare the identified cRule and Wrule:
    1. No cRule, do nothing
    2. cRule exits but no wRule, mark cRule as a "Strong" cRule
    3. cRule and wRule exist, and cRule has greater precedence than wRule, mark as a "Strong" cRule.
    4. cRule and wRule exist, and wRule has greater precedence than cRule, create a SetA structure and add to linked list of SetA structtures.
On completion: (i) all records which are wrongly classified (but for which there is a corresponding cRule) will be collated into a linked list of SetA structres ready for further consideration and (ii) all rules which are "strong" cRules with respect to at least one record will be identified. @param trainingSet the array of arrays of training set records. */ private void findCandWrules(short[][] trainingSet) { for(int index=0;index Proceeds as follows:

  1. If wRule associated with a record is marked as a "cRule", i.e. it correctly classifies at least one other record, adjust classCasesCovered array for wRule and corresponding cRule.
  2. If wRule is not marked, i.e. it is not a "cRule" for any record, determine all the rules after the wRule, that wrongly classify the record upto when the corresponding cRule is reached and place these in a linked list of Overrides structures (these are then all the rules which we would like to remove from the rule list, if possible, so that the record in question will be correctly classified.
@param trainingSet the array of arrays of training set records. */ private void procesSetAlinkedList(short[][] trainingSet) { SetA linkRef = startSetAlist; // Loop through the list of records that have been wrongly classified while (linkRef != null) { // Get class index int index = linkRef.classLabel-numOneItemSets+numClasses-1; // If wRule is a strong cRule for some other record, we cannot get // rid of it, therefore adjust distribution array accordingly. if (linkRef.wRule.isAcRule) { linkRef.cRule.classCasesCovered[index]--; linkRef.wRule.classCasesCovered[index]++; } // Else wRule is not a strong cRule for some other record, // therefore identify all rules that overide the corresponding // cRule for the record. else { allCoverRules(trainingSet[linkRef.tid],linkRef.tid, linkRef.classLabel,linkRef.cRule,linkRef.wRule); } linkRef = linkRef.next; } } /* ALL COVER RULES */ /** Finds all rules with higher precedence than the given cRule that wrongly classify the record, commencing with the given wRule, before the c rule is reached.

Note that we only have to consider rules marked as cRules (with respect to some other record(s)), other rules can safely be removed from the data set. Proceeds as follows: For each rule between the wRule and the cRule that are CRules with respect to some other record:

  1. Add the cRule to the rules overrides list.
  2. Increment the classCasesCovered for the rule.
@param record the current record in the training sert. @param tid the TID number for the current record in the training set. @param classLabel the label for the record. @param the given cRule up to which the rule list will be processed. @param the given cWule from where the rule list will be processed. */ private void allCoverRules(short[] record, int tid, short classLabel, RuleNodeCBA cRule, RuleNodeCBA wRule) { // Determine class index int classIndex = classLabel-numOneItemSets+numClasses-1; // Process wRule list RuleNodeCBA linkRef = wRule.next; while (linkRef != null) { // If reached the given cRule jump out of the loop if (linkRef == cRule) break; // Check if current rule is also a cRule (i.e. satisfies at least // one record in the training set), otherwise ignore if (linkRef.isAcRule) { // Check if identified cRule satisfies current record and if so // add to overides linked list. if (isSubset(linkRef.antecedent,record)) { // Add to replace list for rule identified by linkRef Overrides newOverrides = new Overrides(cRule,tid, classLabel); newOverrides.next = linkRef.replaceList; linkRef.replaceList = newOverrides; // Increment counter linkRef.classCasesCovered[classIndex]++; // Set to true linkRef.isAstrongCrule = true; } } linkRef = linkRef.next; } } /* --------------------------------------------------------------- */ /* STAGE 3: PROCESS RULE LIST */ /* --------------------------------------------------------------- */ /* PROCESS RULE LIST */ /** Creates the final list of CBA rules by processesing the rule list to identify, for each "strong" cRule the default class and the total error count.

Commences with the generation of a global class distribution array which contains the number of training cases for each class. Next the "overriden" rule list for each "strong" cRule is considered, if such a list exists, it is processed. Next the default class is identified and the total error count calculated. @param trainingSet the array of arrays of training set records. */ private void processRuleList(short[][] trainingSet) { // Local fields int ruleErrors=0; // Class distribution array int[] classDistrib = genClassclassCasesCovered(trainingSet); // Process rule list RuleNodeCBA linkRef = startCBAruleList; while(linkRef!=null) { // Consider "strong" cRules only if (linkRef.isAstrongCrule) { // Get index in class records array for consequent of rule int classIndex = linkRef.consequent[0]-numOneItemSets+ numClasses-1; // Check that rule correctly satisfies at least one record // (otherwise ignore). if (linkRef.classCasesCovered[classIndex] != 0) { // Process overrides linked list for current "strong" cRule Overrides orLinkRef = linkRef.replaceList; while (orLinkRef != null) { int cIndex = orLinkRef.classLabel-numOneItemSets+ numClasses-1; // The TID referenced by the current overriding rule // Is already satisfied by a previous rule decrement // Current rule distribution array, otherwise // decrement overriding rule's distribution array if (coveredByPreviousRule(orLinkRef.tid,linkRef, trainingSet)) { linkRef.classCasesCovered[cIndex]--; } else orLinkRef.cRule.classCasesCovered[cIndex]--; orLinkRef = orLinkRef.next; } // Determine number of accumulated miss-classifications // that will result if the given rule were the last rule in // the classifier (i.e. the default rule) for (int index=0;indexclassDistrib[defaultIndex]) defaultIndex=index; } // Add to defualt field for cRule cRule.defaultClass = (short) (defaultIndex+numOneItemSets-numClasses+1); // Calculate errors int errors=0; for (int index=0;index The final classifier comprises all the rules upto and including the identified rule pluss a default rule that produces the default class associated with the identified rule. */ private void generateClassifier() { RuleNodeCBA linkRef = startCBAruleList; // Find lowest total error int error = findLowestTotalError(); // Process rule list while(linkRef!=null) { if (linkRef.isAstrongCrule) { int classIndex = linkRef.consequent[0]-numOneItemSets+ numClasses-1; if (linkRef.classCasesCovered[classIndex] != 0) { insertRuleIntoRulelist(linkRef.antecedent,linkRef.consequent, linkRef.confidenceForRule, linkRef.supportForRule); } } if (linkRef.totalErrors==error) break; linkRef = linkRef.next; } // Add default rule short[] consequent = new short[1]; consequent[0] = linkRef.defaultClass; insertRuleIntoRulelist(null,consequent,0.0,0.0); // // Number Rule in BinTree numberRulesInBinTree(); } /* FIND LOWEST TOTAL ERROR */ /** Finds the first strong c rule (that satisfies at least one record) in the CBA rule list with the lowest total error. @return the minimum total error. */ private int findLowestTotalError() { int error=numRows; // Maximum error, all records miss classified // Process rule list RuleNodeCBA linkRef = startCBAruleList; while(linkRef!=null) { if (linkRef.isAstrongCrule) { int classIndex = linkRef.consequent[0]-numOneItemSets+ numClasses-1; if (linkRef.classCasesCovered[classIndex] != 0) { if (linkRef.totalErrors "); // Consequent if (ruleListSchemaOutputFlag) outputItemSetSchema(rule.consequent); else outputItemSet(rule.consequent); } /* Outputs a single CBA rule antecedent and consequent (GUI version). @param textArea the text area to output data to. @param rule the rule to be output. */ private void outputRule(JTextArea textArea, RuleNodeCBA rule) { // Antecedent if (ruleListSchemaOutputFlag) outputItemSetSchema(textArea,rule.antecedent); else outputItemSet(textArea,rule.antecedent); // Operator textArea.append(" -> "); // Consequent if (ruleListSchemaOutputFlag) outputItemSetSchema(textArea,rule.consequent); else outputItemSet(textArea,rule.consequent); } /* OUTPUT OVERRIDES LINKED LIST */ /** Outputs the given overrides linked list associated with a particular CBA rule. @param listRef the reference to the overrides linked list. */ private void outputOverridesLinkedList(Overrides listRef) { Overrides linkRef = listRef; // Check for empty list if (linkRef== null) { System.out.print(", Overrides linked list: null"); return; } else System.out.print(", Overrides linked list:"); // Loop through list int counter = 0; while (linkRef != null) { if (counter==0) System.out.print(" "); else System.out.print(", "); counter++; outputRule(linkRef.cRule); System.out.println(", TID = " + linkRef.tid + ", classLabel = " + linkRef.classLabel); linkRef = linkRef.next; } } /* OUTPUT OVERRIDES LINKED LIST */ /** Outputs the given overrides linked list associated with a particular CBA rule (GUI versiu on). @param listRef the reference to the overrides linked list. */ private void outputOverridesLinkedList(JTextArea textArea, Overrides listRef) { Overrides linkRef = listRef; // Check for empty list if (linkRef== null) { textArea.append(" Overrides linked list: null"); return; } else textArea.append(" Overrides linked list:"); // Loop through list while (linkRef != null) { textArea.append("\t"); outputRule(textArea,linkRef.cRule); textArea.append(", TID = " + linkRef.tid + ", classLabel = " + linkRef.classLabel); linkRef = linkRef.next; } } /* OUTPUT NUMBER OF CBA RULES */ /** Outputs number of generated rules (ARs or CARS). */ public void outputNumCBArules() { System.out.println("Number of CBA rules = " + getNumCBA_CRs()); } /* ----------------------------------------- */ /* */ /* DIAGNOSTIC OUTPUT */ /* */ /* ----------------------------------------- */ /* OUTPUT SET A LINKED LIST */ /** Outputs the Set A linked list for diagnostic purposes. @param trainingSet the array of arrays of training set records. */ private void outputSetAlinkedList(short[][] trainingSet) { int number = 1; SetA linkRef = startSetAlist; while (linkRef!=null) { System.out.print(number + ". TID = " + linkRef.tid + ", Itemset = "); outputItemSet(trainingSet[linkRef.tid]); System.out.print("\ncRule = "); outputRule(linkRef.cRule); System.out.print("wRule = "); outputRule(linkRef.wRule); linkRef = linkRef.next; number++; } } /* OUTPUT INT ARRAY */ /* Outputs an array of integers. @param theArray the gievn array. */ private void outputIntArray(int[] theArray) { if (theArray==null) { System.out.print("null"); return; } System.out.print("[" + theArray[0]); for(int index=1;index