/* -------------------------------------------------------------------------- */ /* */ /* APRIORI-TFP CLASSIFICATION RULE GENERATION */ /* */ /* Frans Coenen */ /* */ /* Tuesday 29 April 2003 */ /* (Revised Tuesday 21/10/003, 8/1/2004, 12/10/2006) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Class structure AssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree | +-- AprioriTFPclass | +-- AprioriTFP_CRgen */ //package lucsKDD_ARM; // Java packages import java.util.*; import java.io.*; // Java GUI packages import javax.swing.*; /** Methods to produce classification rules using a APRIORI-TFP approach with either best first or K best first amtching, CSA (Confidence, Support and Antecedent size) ordering, and T-tree X-checking. Alternative ordering and matching strategies are defined in sub-classes of this class. Code assumes that input dataset is organised such that classifiers are at the end of each record. T-tree is constructed in such a way that on then first itteration all 1-item sets and all 2-item sets for the class branches are generted (in a sense the T-tree is "stepped"). On following itterations level N antecedent (non-class) branches are processed and N+1 class branches. Note: number of classifiers value is stored in the numClasses field. Code supports two approaches: 1) AprioriTFP-CR with 50:50 split of training-test set data. 2) AprioriTFP-CR with TCV. @author Frans Coenen @version 12 October 2006 */ public class AprioriTFP_CRgen extends AprioriTFPclass { /* ------ FIELDS ------ */ // None /* ------ CONSTRUCTORS ------ */ /** Constructor with command line arguments to be process. @param args the command line arguments (array of String instances). */ public AprioriTFP_CRgen(String[] args) { super(args); } /** Constructor with argument from existing instance of class AssocRuleMining. @param armInstance the given instance of the AssocRuleMining class. */ public AprioriTFP_CRgen(AssocRuleMining armInstance) { super(armInstance); } /** Default constructor. */ public AprioriTFP_CRgen() { } /* ------ METHODS ------ */ /*----------------------------------------------------------------------- */ /* */ /* START CLASSIFICATION BEST FIRST MATCHING */ /* */ /*----------------------------------------------------------------------- */ /* START CLASSIFICATION */ /** Starts classification rule generation process using Apriori-TFP, by building a P-tree. @return The classification accuarcay (%). */ public double startClassification() { String s = "START CLASSIFICATION (BEST FIRST), TFPC WITH X-CHEKING " + "AND CSA ORDERING\n-----------------------" + "----------------------------------------\n"; // proceed return(startClassification(s)); } /** Starts classification rule generation process using, Apriori-TFP, by building a P-tree (GUI version). @param tArea the text area to output data to. @return The classification accuarcay (%). */ public double startClassification(JTextArea tArea) { // Set text area textArea = tArea; // proceed return(startClassification()); } /** Starts classification rule generation process using, Apriori-TFP, by building a P-tree (version with input string argument). @param s String to be output to GUI/Command line interface. @return The classification accuarcay (%). */ protected double startClassification(String s) { if (textArea==null) { System.out.print(s); outputLimits(); } else { textArea.append(s); outputLimits(textArea); } // Create P-tree if (textArea==null) createPtree(); else createPtree(textArea); // Generate CRs using Apriori-TFP return(startClassification2()); } /** Continues classification rule generation proces using Apriori-TFP causing T-tree to be generated form P-tree, and ivoking classification testing.
Method called frequently during hill climbing process. @return The classification accuarcay (%). */ protected double startClassification2() { // Generate T-tree, and generate CRs. startClassification3(); // Output generated rule set if requested if (outputRuleSetToFileFlag) outputRulesToFile(); // Test classification using the test set. return(twoDecPlaces(testClassification())); } /*----------------------------------------------------------------------- */ /* */ /* START CLASSIFICATION BEST K MATCHING */ /* */ /*----------------------------------------------------------------------- */ /* START CLASSIFICATION BEST K */ /** Starts classification rule generation process using (with "best K classification"), Apriori-TFP, by building a P-tree. @return The classification accuarcay (%). */ public double startClassificationBestK() { String s = "START CLASSIFICATION (BEST K), TFPC WITH X-CHEKING " + "AND CSA ORDERING\n-----------------------" + "----------------------------------------\n" + "Best K value = " + kValue + "\n"; // Proceed return(startClassificationBestK(s)); } /** Starts classification rule generation process using (with "best K classification"), Apriori-TFP, by building a P-tree (GUI version). @param tArea the text area to output data to. @return The classification accuarcay (%). */ public double startClassificationBestK(JTextArea tArea) { textArea = tArea; // Proceed return(startClassificationBestK()); } /** Starts classification rule generation process using (with "best K classification"), Apriori-TFP, by building a P-tree (version with input string argument). @param s String to be outpurt to GUI/Command line interface.). @return The classification accuarcay (%). */ protected double startClassificationBestK(String s) { if (textArea==null) { System.out.print(s); outputLimits(); } else { textArea.append(s); outputLimits(textArea); } // Create P-tree if (textArea==null) createPtree(); else createPtree(textArea); // Generate CRs using Apriori-TFP return(startClassificationBestK2()); } /** Continues classification rule generation proces (with "best K classification") using Apriori-TFP.
Method called frequently during hill climbing process. @return The classification accuarcay (%). */ protected double startClassificationBestK2() { // Generate T-tree, and generate CRs. startClassification3(); // Test classification using the test set. return(twoDecPlaces(testClassificationBestK())); } /*----------------------------------------------------------------------- */ /* */ /* START CLASSIFICATION BEST FIRST WITH TCV */ /* */ /*----------------------------------------------------------------------- */ /* COMMEMCE TEN CROSS VALIDATION */ /** Start Ten Cross Validation (TCV) process using Apriori-TFP.
Assumes
that data has been spilt into 10 equal portions.
@return overall accuracy (%). */
public double commenceTCV() {
double[] parameters = new double[10];
// Loop through tenths data sets
for (int index=0;index<10;index++) {
// Create training and test sets
createTrainingAndTestDataSets(index);
// Mine data, produce T-tree and generate CRs
parameters[index] = startClassification();
}
// Determine overal accuracy
double totalAccu = 0;
for (int index=0;index Overides method
in PartialSupportTree class, distinction is that this method includes test
if it is possible to generate more CRs after the genertion of the first
level. Remeber that createTtreeTopLevel creates the second level of
the class branches of the T-tree as well as the entire top level so we may
already have generated some rules aftyer the first pass of the P-tree. */
protected void contCreateTtree() {
// Create Top level of T-tree (First pass of dataset). Defined in
// in TotlaSupportTree class.
createTtreeTopLevel();
// Generate level 2 in T-tree if more CRs can be generated
int currentLevel = 2;
if (!checkIfNoMoreCRsPossible(currentLevel)) {
generateLevel2();
createTtreeLevelN();
}
}
/* ------------------------------------ */
/* CREATE TOP LEVEL OF T-TREE */
/* ------------------------------------ */
/* CREATE T-TREE TOP LEVEL */
/** Generates level 1 (top) of the T-tree. Overides method in
TotalSupportTree class. Distinction is that this method creates not only
the top level for all attributes but also the second level for the class
attribute branches. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startTtreeRef = new TtreeNode[numOneItemSets+1];
for (int index=1;index<=numOneItemSets;index++)
startTtreeRef[index] = new TtreeNode();
// Dimension and initialise second level of T-tree for class attribute
// branches
for (int classIndex = numOneItemSets-numClasses+1;
classIndex<=numOneItemSets;classIndex++) {
startTtreeRef[classIndex].childRef = new TtreeNode[classIndex];
for (int index=1;index<=classIndex-1;index++) {
startTtreeRef[classIndex].childRef[index] = new TtreeNode();
}
}
// Add support for each 1 itemset and 2-itemsets in class attribute
createTtreeTopLevel2();
// Prune top level, setting any unsupported 1-itemsets to null and
// level 2 class attribute branches
pruneTopLevel();
// Generate Classification Rules (CRs)
int currentLevel = 2;
generateCRs(currentLevel);
}
/* GENERATE T-TREE TOP LEVEL 2 */
/** Commences process to generate top level (singletons) of Ttree by
looping through table level by level (row by row) Distinction between
this method and the method which it overides in PartialsupportTree
class is that this method does not destroy any part of the P-tree table as
this may be required by further calls, e.g. when using a hill
climbing approach. */
protected void createTtreeTopLevel2() {
numLevelsInTtree = 2;
// Step through Ptree table
for(int index=1;index Identical rule
in AprioriT_CRgen class.
@param level the desired level in the T-tree.
@param itemSetSofar the label for a T-treenode as generated sofar (set to
`null' at start).
@param consequent the class for the rules to be generated.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array level in the T-tree. */
private void generateCRs(int level, short[] itemSetSofar,
short[] consequent, int size, TtreeNode[] linkRef) {
// At right level
if (level==1) generateCRsRightLevel(itemSetSofar,consequent,size,
linkRef);
// Wrong level
else generateCRsWrongLevel(level,itemSetSofar,consequent,size,
linkRef);
}
/* GENERATE CLASSIFICATION RULES LINKED LIST (RIGHT LEVEL) */
/** Generates classification rules for the give array of T-tree nodes.
Identical rule in AprioriT_CRgen class.
@param itemSetSofar the label for a T-treenode as generated sofar (set to
`null' at start).
@param consequent the class for the rules to be generated.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array level in the T-tree. */
protected void generateCRsRightLevel(short[] itemSetSofar,
short[] consequent, int size, TtreeNode[] linkRef) {
// Step through level and set to null where CR found above minimum
// confidence threshold
for (int index=1;index Identical rule in AprioriT_CRgen
class.
@param level the desired level in the T-tree.
@param itemSetSofar the label for a T-treenode as generated sofar (set to
'null' at start).
@param consequent the class for the rules to be generated.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array level in the T-tree. */
private void generateCRsWrongLevel(int level, short[] itemSetSofar,
short[] consequent, int size, TtreeNode[] linkRef) {
// Step through array of T-tree nodes
for (int index=1;index