00001 /** 00002 * @file Histogram.cpp 00003 * 00004 * Implementation of class Histogram. 00005 * 00006 * @author <a href="mailto:juengel@informatik.hu-berlin.de">Matthias Juengel</a> 00007 */ 00008 00009 #include "Histogram.h" 00010 00011 void Histogram::init() 00012 { 00013 init(256); 00014 } 00015 00016 void Histogram::init(int numberOfEntries) 00017 { 00018 this->numberOfEntries = numberOfEntries; 00019 for(int i = 0; i < numberOfEntries; i++) value[i] = 0; 00020 00021 numberOfAddedEntries = 0; 00022 sum = 0; 00023 } 00024 00025 void Histogram::add(int index) 00026 { 00027 if(index >= maxNumberOfEntries) return; 00028 value[index]++; 00029 00030 numberOfAddedEntries++; 00031 sum += index; 00032 } 00033 00034 int Histogram::getValue(int index) 00035 { 00036 if(index >= maxNumberOfEntries) return 0; 00037 return value[index]; 00038 } 00039 00040 int Histogram::getNumberOfEntries() 00041 { 00042 return numberOfEntries; 00043 } 00044 00045 Histogram::HistogramID Histogram::getHistogramID() 00046 { 00047 return histogramID; 00048 } 00049 00050 00051 00052 double Histogram::getAverage() 00053 { 00054 return (double)sum / (double)numberOfAddedEntries; 00055 } 00056 00057 double Histogram::getAverageFrequencyOverAllEntries() 00058 { 00059 return (double)numberOfAddedEntries / (double)numberOfEntries; 00060 } 00061 00062 double Histogram::getAverageFrequencyOverUsedEntries() 00063 { 00064 int numberOfUsedEntries = 0; 00065 for(int i = 0; i < numberOfEntries; i++) if(value[i] != 0) numberOfUsedEntries++; 00066 return (double)numberOfAddedEntries / (double)numberOfUsedEntries; 00067 } 00068 00069 void Histogram::analyseClusters() 00070 { 00071 numberOfClusters = 3; 00072 beginOfCluster[0] = 30; 00073 endOfCluster[0] = 50; 00074 beginOfCluster[1] = 90; 00075 endOfCluster[1] = 150; 00076 beginOfCluster[2] = 220; 00077 endOfCluster[2] = 250; 00078 00079 enum{inCluster, betweenClusters} state = betweenClusters; 00080 00081 numberOfClusters = 0; 00082 int numberOfValuesSinceLastCluster = 0; 00083 // int numberOfValuesInThisCluster = 0; 00084 for(int index = 0; index < numberOfEntries; index++) 00085 { 00086 if(state == betweenClusters) 00087 { 00088 if(value[index] > getAverageFrequencyOverAllEntries()) 00089 /* numberOfValuesInThisCluster++; 00090 else 00091 numberOfValuesInThisCluster = 0; 00092 if(numberOfValuesInThisCluster > 3) 00093 */ 00094 { 00095 state = inCluster; 00096 // beginOfCluster[numberOfClusters] = index + 1 - numberOfValuesInThisCluster; 00097 beginOfCluster[numberOfClusters] = index; 00098 // numberOfValuesInThisCluster = 0; 00099 } 00100 } 00101 else // in cluster 00102 { 00103 if(value[index] <= getAverageFrequencyOverAllEntries()) 00104 numberOfValuesSinceLastCluster++; 00105 else 00106 numberOfValuesSinceLastCluster = 0; 00107 if(numberOfValuesSinceLastCluster > 5) 00108 { 00109 state = betweenClusters; 00110 endOfCluster[numberOfClusters] = index - numberOfValuesSinceLastCluster; 00111 if(endOfCluster[numberOfClusters] - beginOfCluster[numberOfClusters] > 3) numberOfClusters++; 00112 numberOfValuesSinceLastCluster = 0; 00113 } 00114 } 00115 } 00116 if(state == inCluster) 00117 { 00118 endOfCluster[numberOfClusters] = numberOfEntries - 1; 00119 if(endOfCluster[numberOfClusters] - beginOfCluster[numberOfClusters] > 3) numberOfClusters++; 00120 } 00121 } 00122 00123 int Histogram::getNumberOfClusters() 00124 { 00125 return numberOfClusters; 00126 } 00127 00128 int Histogram::getBeginOfCluster(int index) 00129 { 00130 return beginOfCluster[index]; 00131 } 00132 00133 int Histogram::getEndOfCluster(int index) 00134 { 00135 return endOfCluster[index]; 00136 } 00137 00138 In& operator>>(In& stream, Histogram& histogram) 00139 { 00140 stream.read(&histogram,sizeof(Histogram)); 00141 return stream; 00142 } 00143 00144 Out& operator<<(Out& stream, Histogram& histogram) 00145 { 00146 stream.write(&histogram,sizeof(Histogram)); 00147 return stream; 00148 } 00149 00150 00151 /* 00152 * Change log : 00153 * 00154 * $Log: Histogram.cpp,v $ 00155 * Revision 1.1.1.1 2004/05/22 17:37:08 cvsadm 00156 * created new repository GT2004_WM 00157 * 00158 * Revision 1.1 2003/10/07 10:13:24 cvsadm 00159 * Created GT2004 (M.J.) 00160 * 00161 * Revision 1.1 2003/09/26 11:40:40 juengel 00162 * - sorted tools 00163 * - clean-up in DataTypes 00164 * 00165 * Revision 1.3 2003/09/05 18:52:48 dueffert 00166 * warning removed 00167 * 00168 * Revision 1.2 2003/08/29 13:12:12 juengel 00169 * changed parameter in cluster method 00170 * 00171 * Revision 1.1 2003/08/25 17:19:39 juengel 00172 * Added Histogram 00173 * 00174 * 00175 */ 00176