/* -------------------------------------------------------------------------- */ /* */ /* RULE LIST */ /* */ /* Frans Coenen */ /* */ /* Tuesday 2 March 2004 */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- RuleList */ // Java packages import java.io.*; import java.util.*; // Java GUI packages import javax.swing.*; /** Set of utilities to support various Association Rule Mining (ARM) algorithms included in the LUCS-KDD suite of ARM programs. @author Frans Coenen @version 2 March 2004 */ public class RuleList extends AssocRuleMining { /* ------ FIELDS ------ */ // --- Data structures --- /** Rule node in linked list of rules (either ARs or CRs). */ protected class RuleNode { /** Antecedent of AR. */ protected short[] antecedent; /** Consequent of AR. */ protected short[] consequent; /** The confidence value associate with the rule represented by this node. */ double confidenceForRule=0.0; /** Link to next node */ RuleNode next = null; /** Three argument constructor @param antecedent the antecedent (LHS) of the AR. @param consequent the consequent (RHS) of the AR. @param support the associated confidence value. */ private RuleNode(short[] ante, short[]cons, double confValue) { antecedent = ante; consequent = cons; confidenceForRule = confValue; } } /** Rule node in linked list of rules (either ARs or CRs) for CMAR algorithm. */ protected class RuleNodeCMAR { /** Antecedent of AR. */ protected short[] antecedent; /** Consequent of AR. */ protected short[] consequent; /** The confidence value associate with the rule represented by this node. */ double confidenceForRule=0.0; /** The support value associate with the rule represented by this node. */ double supportForRule=0.0; /** The support value associate with the antecedent of the rule represented by this node. */ double suppAntecedent=0.0; /** The support value associate with the consequent of the rule represented by this node. */ double suppConsequent=0.0; /** Link to next node */ RuleNodeCMAR next = null; /** Six argument constructor @param antecedent the antecedent (LHS) of the AR. @param consequent the consequent (RHS) of the AR. @param suppValue the associated support value. @param suppAnte the associated support for the antecedent. @param suppCons the associated support for the consequent. @param comsvalue the associated confidence value. */ private RuleNodeCMAR(short[] ante, short[]cons, double suppValue, double suppAnte, double suppCons, double confValue) { antecedent = ante; consequent = cons; supportForRule = suppValue; suppAntecedent = suppAnte; suppConsequent = suppCons; confidenceForRule = confValue; } } /** The reference to start of the rule list. */ protected RuleNode startRulelist = null; /** The reference to start of the CMAR rule list. */ protected RuleNodeCMAR startCMARrulelist = null; /** 1-D array for observed values for Chi-Squared Testing. */ private double[] obsValues = new double[4]; /** 1-D array for expected values for Chi-Squared Testing. */ private double[] expValues = new double[4]; // --- Chi-Squared Testing Constants --- /** Critical threshold for 10% "significance" level (assuming "degree of freedom" equivalent to 1). */ private static final double THRESHOLD_10 = 2.7055; /** Critical threshold for 5% "significance" level (assuming "degree of freedom" equivalent to 1). */ private static final double THRESHOLD_5 = 3.8415; /** Critical threshold for 2.5% "significance" level (assuming "degree of freedom" equivalent to 1). */ private static final double THRESHOLD_2HALF = 5.0239; /** Critical threshold for 1% "significance" level (assuming "degree of freedom" equivalent to 1). */ private static final double THRESHOLD_1 = 6.6349; /** Critical threshold for 0.5% "significance" level (assuming "degree of freedom" equivalent to 1). */ private static final double THRESHOLD_HALF = 7.8794; // Cover constant /** Minimum times a record mist be covered */ private static int MIN_COVER=3; // At least 3 rules // --- Chi-Squared Fields --- /** Support value for the antecedent of the rule. */ private double supAntecedent; /** Support value for NOT the antecedent of the rule. */ private double supNotAntecedent; /** Support value for the concequent of the rule. */ private double supConsequent; /** Support value for NOT the concequent of the rule. */ private double supNotConsequent; /** Support for the rule. */ private double supRule; /** Number of records in the input (training) sets. */ private double numRecords; /** Current critical threshold value. */ private double threshold = THRESHOLD_5; // Default /* ------ CONSTRUCTORS ------ */ /** Default constructor to create an instance of the class RuleList */ public RuleList() { } /* ------ METHODS ------ */ /* ---------------------------------------------------------------- */ /* */ /* RULE LINKED LIST ORDERED ACCORDING TO CMAR RANKING */ /* */ /* ---------------------------------------------------------------- */ /* Methods for inserting rules into a linked list of rules ordered according to CMAR ranking. Each rule described in terms of 4 fields: 1) Antecedent (an item set), 2) a consequent (an item set), 3) a total support value and 4) a confidence value (double). */ /* INSERT (ASSOCIATION/CLASSIFICATION) RULE INTO RULE LINKED LIST (ORDERED ACCORDING CONFIDENCE). */ /** Inserts an (association/classification) rule into the linkedlist of rules pointed at by startRulelist.

List is ordered according to "CMAR" ranking. @param antecedent the antecedent (LHS) of the rule. @param consequent the consequent (RHS) of the rule. @param supportForAntecedent the associated support for the antecedent. @param supportForConsequent the associated support for the consequent. @param supportForRule the associated support value. @param confidenceForRule the associated confidence value. */ protected void insertRinRlistCMARranking(short[] antecedent, short[] consequent, double supportForAntecedent, double supportForConsequent, double supportForRule, double confidenceForRule) { // Test rule using Chi-Squared testing if (!testRuleUsingChiSquaredTesting(supportForAntecedent, supportForConsequent,supportForRule,numRows)) return; // Create new node RuleNodeCMAR newNode = new RuleNodeCMAR(antecedent,consequent, supportForRule,supportForAntecedent, supportForConsequent,confidenceForRule); // Empty rule list situation if (startCMARrulelist == null) { startCMARrulelist = newNode; return; } // Check if more general rule with higher ranking exists. if (moreGeneralRuleExists(newNode)) return; // Add new node to start if (ruleIsCMARgreater(newNode,startCMARrulelist)) { newNode.next = startCMARrulelist; startCMARrulelist = newNode; return; } // Add new node to middle RuleNodeCMAR markerNode = startCMARrulelist; RuleNodeCMAR linkRuleNode = startCMARrulelist.next; while (linkRuleNode != null) { if (ruleIsCMARgreater(newNode,linkRuleNode)) { markerNode.next = newNode; newNode.next = linkRuleNode; return; } markerNode = linkRuleNode; linkRuleNode = linkRuleNode.next; } // Add new node to end markerNode.next = newNode; } /* MORE GENERAL EXISTS */ /** Tests whether a more general rule, with higher ranking, already exists in the rule list. @param rule the rule under consideration. @return true if more general rule with higher ranking exists, and false otherwise. */ private boolean moreGeneralRuleExists(RuleNodeCMAR rule) { RuleNodeCMAR linkRef = startCMARrulelist; // Loop through list while (linkRef!=null) { if (ruleIsMoreGeneral(rule,linkRef) && ruleIsCMARgreater2(rule,linkRef)) return(true); linkRef=linkRef.next; } // Default return return(false); } /* RULE IS MORE GENERAL */ /** Compares two rules and returns true if the first is a more general rule than the second (has fewer antecedent attributes). @param rule1 the given rule to be compared to the second. @param rule2 the rule which the given rule1 is to be compared to. @return true id rule1 is greater then rule2, and false otherwise. */ private boolean ruleIsMoreGeneral(RuleNodeCMAR rule1, RuleNodeCMAR rule2) { if (rule1.antecedent.length < rule2.antecedent.length) return(true); // Otherwise return false return(false); } /* RULE IS CMAR GREATER */ /** Compares two rules and returns true if the first is "CMAR greater" (has a higher ranking) than the second.

CMAR ordering is as follows:

  1. Confidence, a rule r1 has priority over a rule r2 if confidence(r1) > confidence(r2).
  2. Support, a rule r1 has priority over a rule r2 if confidence(r1)==confidence(r2) && support(r1)>support(r2) .
  3. Size of antecedent, a rule r1 has priority over a rule r2 if confidence(r1)==confidence(r2) && support(r1)==spoort(r2) &&|Ar1|<|Ar2| .
@param rule1 the given rule to be compared to the second. @param rule2 the rule which the given rule1 is to be compared to. @return true id rule1 is greater then rule2, and false otherwise. */ private boolean ruleIsCMARgreater(RuleNodeCMAR rule1, RuleNodeCMAR rule2) { // Compare confidences if (rule1.confidenceForRule > rule2.confidenceForRule) return(true); // If confidences are the same compare support values if (similar2dec(rule1.confidenceForRule,rule2.confidenceForRule)) { if (rule1.supportForRule > rule2.supportForRule) return(true); // If confidences and supports are the same compare antecedents if (similar2dec(rule1.supportForRule,rule2.supportForRule)) { if (rule1.antecedent.length < rule2.antecedent.length) return(true); } } // Otherwise return false return(false); } /* RULE IS CMAR GREATER 2 */ /** Compares two rules, such that the first id more general than the second, and returns true if the first is "CMAR greater" (has a higher ranking) than the second.

Method similar to ruleIsCMARgreater method but with the "more general rule" prerequisite. CMAR ordering (founded on confidence and support only) is as follows:

  1. Confidence, a rule r1 has priority over a rule r2 if confidence(r1) > confidence(r2).
  2. Support, a rule r1 has priority over a rule r2 if confidence(r1)==confidence(r2) && support(r1)>support(r2) .
@param rule1 the given rule to be compared to the second. @param rule2 the rule which the given rule1 is to be compared to. @return true id rule1 is greater then rule2, and false otherwise. */ private boolean ruleIsCMARgreater2(RuleNodeCMAR rule1, RuleNodeCMAR rule2) { // Compare confidences if (rule1.confidenceForRule > rule2.confidenceForRule) return(true); // If confidences are the same compare support values if (similar2dec(rule1.confidenceForRule,rule2.confidenceForRule)) { if (rule1.supportForRule > rule2.supportForRule) return(true); } // Otherwise return false return(false); } /* -------------------------------------------- */ /* */ /* CHI SQUARED TESTING */ /* */ /* -------------------------------------------- */ /* TEST RULE USING CHI SQUARED TESTING */ /** Tests a classification rule with the given parameters to determine the interestingness/surprisingness of the rule. @param supA the support value for the antecedent of the rule. @param supC the support value for the consequent of the rule. @param supAC the support for the rule. @param numR the number of records in the input (training) sets. @return true if Chi squared value is above critical threshold and false otherwise. */ public boolean testRuleUsingChiSquaredTesting(double supA, double supC, double supAC, double numR) { // Calculate Chi squared value double chiSquaredValue = getChiSquaredValue(supA,supC,supAC,numR); // Test Chi Squared value. if (chiSquaredValue>threshold) return(true); else return(false); } /* GET CHI-SQUARED VALUE */ /** Calculates and returns the Chi-Squared value for a rule. @param supA the support value for the antecedent of the rule. @param supC the support value for the consequent of the rule. @param supAC the support for the rule. @param numR the number of records in the input (training) sets. @return the Chi squared value. */ private double getChiSquaredValue(double supA, double supC, double supAC, double numR) { // Set values supAntecedent = supA; supConsequent = supC; supRule = supAC; numRecords = numR; // Calculate observed and expected values calculateObsValues(); calculateExpValues(); // Calculate and return Chi squared value return(calcChiSquaredValue()); } /* CALCULATE OBSERVED VALUES */ /** Calculates observed values for Chi squared testing calculation. */ private void calculateObsValues() { obsValues[0]=supRule; obsValues[1]=supAntecedent-supRule; obsValues[2]=supConsequent-supRule; obsValues[3]=numRecords-supAntecedent-supConsequent+supRule; // Calculate additional support values supNotAntecedent=numRecords-supAntecedent; supNotConsequent=numRecords-supConsequent; } /* CALIULASTE EXPECTED VALUES */ /** Calculates expected values for Chi squared testing calculation. */ private void calculateExpValues() { expValues[0]=(supConsequent*supAntecedent)/numRecords; expValues[1]=(supNotConsequent*supAntecedent)/numRecords; expValues[2]=(supConsequent*supNotAntecedent)/numRecords; expValues[3]=(supNotConsequent*supNotAntecedent)/numRecords; } /* CALCULATE CHI SQUARED VALUE */ /** Calculates the Chi squared values and returns their sum. @return the sum of the Chi Squared values. */ private double calcChiSquaredValue() { double sumChiSquaredValues = 0.0; for (int index=0;indexMIN_COVER) trainingSet[index]=null; } } // Set rule list startCMARrulelist = newStartRef; } /* EMPTY DATA SET */ /** Tests whether a data set is empty or not (all records set to null). @param dataSet the input data set. @return true if empty, false othjerwise. */ private boolean emptyDataSet(short[][] dataSet) { // Loop through given data set for (int index=0;index Proceed as follows:
  1. Collect rules that satisfy the record. if
    1. If consequents of all rules are all identical, or only one rule, classify record.
    2. Else group rules according to classifier and determine the combined effect of the rules in each group, the classifier associated with the "strongest group" is then selected.
    @param classification the possible classes. @param itemset the record to be classified. @return the class label (or 0 if no class found). */ protected short classifyRecordWCS(short[] itemSet) { RuleNodeCMAR linkRef = startCMARrulelist; RuleNodeCMAR tempRulelist = startCMARrulelist; startCMARrulelist=null; // Obtain rules that satisfy record (iremSet) obtainallRulesForRecord(linkRef,itemSet); // If no rules satisfy record return 0 if (startCMARrulelist==null) { startCMARrulelist = tempRulelist; return(0); } // If only one rule return class if (startCMARrulelist.next== null) { short answer = startCMARrulelist.consequent[0]; startCMARrulelist=tempRulelist; return(answer); } // If more than one rule but all have the same class return calss if (onlyOneClass()) { short answer = startCMARrulelist.consequent[0]; startCMARrulelist=tempRulelist; return(answer); } // Group rules RuleNodeCMAR[] ruleGroups = groupRules(); // Determine Weighted Chi-Squared (WCS) Values for each group double[] wcsValues = calcWCSvalues(ruleGroups); // Select group with best WCS value and return associated label short consequent = selectBestWCS(wcsValues); // Reset global rule list reference startCMARrulelist=tempRulelist; // Return class return(consequent); } /* GROUP RULES */ /** Groups rules contained in a linked list of rules pointed at by startCMARrulelist according to their consequent. @return an array of rule groups. */ private RuleNodeCMAR[] groupRules() { // Initialise rule groups data structure RuleNodeCMAR[] ruleGroups = new RuleNodeCMAR[numClasses]; for (int index=0;indexbestValue) { bestValue=wcsValues[index]; bestIndex=index; } } // Return return((short) (numOneItemSets-bestIndex)); } /* CALCULATE CHI SQUARED VALUE UPPER BOUND */ /** Claculates the upper bound for the Chi-Squared value of a rule. @param suppAnte the support for the antecedent of a rule. @param suppCons the support for the consequent of a rule. @return the Chi-Squared upper bound. */ private double calcChiSquaredUpperBound(double suppAnte, double suppCons) { double term; // Test support for antecedent and confidence and choose minimum if (suppAnte Used in Weighted Chi-Squared classification (CMAR) algorithm. @param linkref The reference to the start of the existing list of rules. @param itemset the record to be classified. */ private void obtainallRulesForRecord(RuleNodeCMAR linkRef, short[] itemSet) { RuleNodeCMAR newStartRef = null; RuleNodeCMAR markerRef = null; // Loop through linked list of existing rules while (linkRef!=null) { // If rule satisfies record add to new rule list if (isSubset(linkRef.antecedent,itemSet)) { RuleNodeCMAR newNode = new RuleNodeCMAR(linkRef.antecedent, linkRef.consequent,linkRef.supportForRule, linkRef.suppAntecedent,linkRef.suppConsequent, linkRef.confidenceForRule); if (newStartRef==null) newStartRef=newNode; else markerRef.next=newNode; markerRef=newNode; /*if (newStartRef==null) newStartRef=linkRef; else markerRef.next=linkRef; markerRef=linkRef; */ } linkRef=linkRef.next; } // Set rule list startCMARrulelist = newStartRef; } /* ----------------------------------- */ /* */ /* GET METHODS */ /* */ /* ----------------------------------- */ /* GET NUMBER OF RULES */ /** Returns the number of generated rules (usually used in conjunction with classification rule mining algorithms rather than ARM algorithms). @return the number of CRs. */ public int getNumCRs() { int number = 0; RuleNode linkRuleNode = startRulelist; // Loop through linked list while (linkRuleNode != null) { number++; linkRuleNode = linkRuleNode.next; } // Return return(number); } /* GET NUMBER OF CMAR CLASSIFICATION RULES */ /** Returns the number of generated CMAR classification rules. @return the number of CRs. */ public int getNumCMAR_CRs() { int number = 0; RuleNodeCMAR linkRuleNode = startCMARrulelist; // Loop through linked list while (linkRuleNode != null) { number++; linkRuleNode = linkRuleNode.next; } // Return return(number); } /* ----------------------------------- */ /* */ /* SET METHODS */ /* */ /* ----------------------------------- */ /* SET NUMBER OF ROWS */ /** Sets number of rows field. */ protected void setNumRows(int numR) { numRows=numR; } /* SET NUMBER OF CLASSES */ /** Sets number of rows field. */ protected void setNumClasses(int numC) { numClasses=numC; } /* SET NUMBER OF ONE ITEM SETS */ /** Sets number of one item sets field. */ protected void setNumOneItemSets(int nois) { numOneItemSets=nois; } /* SET START CMAR RULE LIST TO NULL. */ protected void setStartCMARrulelistToNull() { startCMARrulelist = null; } /* SET DATA ARRAT */ /** Set 2-D "short" data array reference. */ /*protected void setDataArray(short[][] dArray) { dataArray=dArray; } */ /* SET RECONVERSION ARRAYS */ /** Sets the reconversion array reference values. @param conversionArrayRef the reference to the 2-D array used to renumber coulmns for input data in terms of frequency of single attributes (reordering will enhance performance for some ARM and CARM algorithms). @param reconversionArrayRef the reference to the 1-D array used to reconvert input data column numbers to their original numbering where the input data has been ordered to enhance computational efficienvy. */ protected void setReconversionArrayRefs(int[][] conversionArrayRef, short[] reconversionArrayRef) { conversionArray = conversionArrayRef; reconversionArray = reconversionArrayRef; } /* ------------------------------ */ /* */ /* OUTPUT */ /* */ /* ------------------------------ */ /* OUTPUT CMAR RULE LINKED LIST */ /** Outputs contents of CMAR rule linked list (if any) */ public void outputCMARrules() { outputRules(startCMARrulelist); } /* OUTPUT CMAR RULE LINKED LIST WITH RECONVERSION */ /** Outputs contents of CMAR rule linked list (if any) */ public void outputCMARrulesWithReconversion() { outputRulesWithReconversion(startCMARrulelist); } /** Outputs given CMAR rule list. @param ruleList the given rule list. */ public void outputRules(RuleNodeCMAR ruleList) { // Check for empty rule list if (ruleList==null) System.out.println("No rules generated!"); // Loop through rule list int number = 1; RuleNodeCMAR linkRuleNode = ruleList; while (linkRuleNode != null) { System.out.print("(" + number + ") "); outputRule(linkRuleNode); System.out.println(" " + twoDecPlaces(linkRuleNode.confidenceForRule) + "%, (" + linkRuleNode.supportForRule + ", " + linkRuleNode.suppAntecedent + ", " + linkRuleNode.suppConsequent + ")"); number++; linkRuleNode = linkRuleNode.next; } } /** Outputs a CMAR rule. @param rule the rule to be output. */ private void outputRule(RuleNodeCMAR rule) { outputItemSet(rule.antecedent); System.out.print(" -> "); outputItemSet(rule.consequent); } /* OUTPUT RULE LINKED LIST WITH RECONVERSION */ /** Outputs contents of rule linked list (if any) with reconversion. */ public void outputRulesWithReconversion(RuleNodeCMAR ruleList) { // Check for empty rule list if (ruleList==null) System.out.println("No rules generated!"); outputConversionArrays(); // Loop through rule list int number = 1; RuleNodeCMAR linkRuleNode = ruleList; while (linkRuleNode != null) { System.out.print("(" + number + ") "); outputItemSetWithReconversion(linkRuleNode.antecedent); System.out.print(" -> "); outputItemSet(linkRuleNode.consequent); System.out.println(" " + linkRuleNode.confidenceForRule + "%"); number++; linkRuleNode = linkRuleNode.next; } } /* OUTPUT RULE LINKED LIST WITH DEFAULT */ /** Outputs contents of rule linked list (if any), with reconversion, such that last rule is the defualt rule. */ /*public void outputRulesWithDefault() { int number = 1; RuleNode linkRuleNode = startRulelist; while (linkRuleNode != null) { // Output rule number System.out.print("(" + number + ") "); // Output antecedent if (linkRuleNode.next==null) System.out.print("Default -> "); else { outputItemSet(linkRuleNode.antecedent); System.out.print(" -> "); } // Output concequent outputItemSet(linkRuleNode.consequent); System.out.println(" " + linkRuleNode.confidenceForRule + "%"); // Increment parameters number++; linkRuleNode = linkRuleNode.next; } } */ /* OUTPUT RULE LINKED LIST WITH DEFAULT AND RECONVERSION */ /** Outputs contents of rule linked list (if any), with reconversion, such that last rule is the defualt rule. */ /*public void outputRulesWithDefaultRecon() { int number = 1; RuleNode linkRuleNode = startRulelist; while (true) { // Output rule number System.out.print("(" + number + ") "); // Output antecedent if (linkRuleNode.next==null) { System.out.print("Default -> "); break; } else { outputItemSetWithReconversion(linkRuleNode.antecedent); System.out.print(" -> "); } // Output concequent outputItemSetWithReconversion(linkRuleNode.consequent); System.out.println(" " + linkRuleNode.confidenceForRule + "%"); // Increment parameters number++; linkRuleNode = linkRuleNode.next; } } */ /* OUTPUT NUMBER OF RULES */ /** Outputs number of generated rules (ARs or CARS). */ public void outputNumRules() { System.out.println("Number of rules = " + getNumCRs()); } /* OUTPUT NUMBER OF CMAR RULES */ /** Outputs number of generated rules (ARs or CARS). */ public void outputNumCMARrules() { System.out.println("Number of CMAR rules = " + getNumCMAR_CRs()); } }