/* ----------------------------------------------------------------------------------- */ /* */ /* D A T A W A R E H O U S E G E N E R A T O R */ /* */ /* Frans Coenen */ /* */ /* 21 January 2000 */ /* */ /* ----------------------------------------------------------------------------------- */ /* Class to generate a binary database from which association rules may be generated. */ /* Example test file: $ javac generator 10 26 00100011001111100101000110 11001001000000010111001010 00010000001010111001010001 10110110101110010001001010 10100000110111001000011000 01010111101101110101111101 00010001001010010010010101 01000011101011110100001111 00100100010111010010000010 11001111100111001001011011 */ import java.util.*; import java.io.*; class Generator { /*--------------------------------------------------------------------------*/ /* */ /* FIELDS */ /* */ /*--------------------------------------------------------------------------*/ final int MAX_COLS = 1024; // Maximum number of columns final int MAX_ROWS = 1000000; // Maximum number of rows final int MAX_DISTRIBUTION = 100; // Maximum distribution (100%) final int MIN_COLS = 1; // Minimum number of columns final int MIN_ROWS = 1; // Minimum number of rows final int MIN_DISTRIBUTION = 1; // Minimum distribution (0%) int numberOfColumns = 16; // Actual number of columns (default 16) int numberOfRows = 10; // Actual number of rows (defualt 10) int distribution = 50; // Distribution String fileName = "testFile"; // Defualt name for output file Random newRandom = new Random(); // Create an instance of the class Random. double[] distributionArray; // Distribution array int[] rowArray; // Row array int attributeCounter = 0; // Number of "1"s generatyed /*--------------------------------------------------------------------------*/ /* */ /* CONSTRUCTORS */ /* */ /*--------------------------------------------------------------------------*/ public Generator() { } public Generator(int columns) { if (checkColumns(columns)) numberOfColumns = columns; } public Generator(int columns, int rows) { if (checkColumns(columns) || checkRows(rows)){ numberOfColumns = columns; numberOfRows = rows; } } public Generator(int columns, int rows, int distrib) { if (checkColumns(columns) || checkRows(rows) || checkDistribution(distrib)){ numberOfColumns = columns; numberOfRows = rows; distribution = distrib; } } public Generator(int columns, int rows, int distrib, String newFileName) { if (checkColumns(columns) || checkRows(rows) || checkDistribution(distrib)){ numberOfColumns = columns; numberOfRows = rows; distribution = distrib; fileName = newFileName; } } /*--------------------------------------------------------------------------*/ /* */ /* METHODS */ /* */ /*--------------------------------------------------------------------------*/ /* OUTPUT File: Output table to file */ public void outputFile() throws java.io.IOException { int density=0; int min = distribution-1; int max = distribution+1; for(int index=0;index<20;index++) { attributeCounter=0; setDistributionArray(); density = outputFile2(); System.out.println("Attributes = " + attributeCounter + ", Density = " + density + "%"); if (density >= min && density <= max) break; } // Output System.out.println("Storage = " + (attributeCounter*2) + " Bytes"); } public int outputFile2() throws java.io.IOException { FileWriter file = new FileWriter(fileName); PrintWriter fileOutput = new PrintWriter(file); int rowCounter = 0, columnCounter = 0; // Nested loop structure to output rows and columns. //System.out.println("Distribution = " + distribution); while(rowCounter < numberOfRows) { columnCounter = 0; rowArray = new int[numberOfColumns]; while(columnCounter < numberOfColumns) { rowArray[columnCounter] = getRandom(columnCounter); columnCounter++; } if (checkRow()) { columnCounter = 0; while(columnCounter < numberOfColumns) { if (rowArray[columnCounter] == 1) fileOutput.print((columnCounter+1) + " "); columnCounter++; } fileOutput.println(); rowCounter++; } } // Close the file fileOutput.println("\n"); fileOutput.close(); int density = (attributeCounter*100)/(numberOfColumns*numberOfRows); return(density); } /* SET DISTRIBUTION ARRAY */ private void setDistributionArray(){ distributionArray = new double[numberOfColumns]; int random; for (int index=0;index= MIN_COLS && columns <= MAX_COLS) return(true); else { System.out.println("EROOR: Given number of columns (" + columns + ") not within range of " + MIN_COLS + ".." + MAX_COLS + ", using default value of " + numberOfColumns + " instead."); return(false); } } /* CHECK ROWS: Check that value input for number of rows is within prescribed range. If so return true. Otherwise use default and return false. */ private boolean checkRows(int rows) { if (rows >= MIN_ROWS && rows <= MAX_ROWS) return(true); else { System.out.println("EROOR: Given number of rows (" + rows + ") not within range of " + MIN_ROWS + ".." + MAX_ROWS + ", using default value of " + numberOfRows + " instead."); return(false); } } /* CHECK DISTRIBUTION: Check that value input for distyribution is within prescribed range. If so return true. Otherwise use default and return false. */ private boolean checkDistribution(int distrib) { if (distrib >= MIN_DISTRIBUTION && distrib <= MAX_DISTRIBUTION) return(true); else { System.out.println("EROOR: Given distribution (" + distrib + ") not within range of " + MIN_DISTRIBUTION + ".." + MAX_DISTRIBUTION + ", using default value of " + distribution + " instead."); return(false); } } /* OUTPUT DISTRIBUTION ARRAY */ private void outputDistribArray() { for (int index=0;index