ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
compressor_generator.h
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file compressor_generator.h
1 
7 //-----------------------------------------------------------------------------
8 #include <map>
9 #include <set>
10 #include <utility>
11 #include <random>
12 #include <vector>
13 
14 #include "RtypesCore.h"
15 //-----------------------------------------------------------------------------
16 using namespace std;
17 
18 UShort_t residesIn(Float_t raw, vector<Float_t>* dict) {
19  for (size_t i = 0; i < dict->size(); ++i) {
20  if (raw <= dict->at(i)) {
21  if (i == 0)
22  return 0;
23  else if ((dict->at(i) - raw) < (raw - dict->at(i-1)))
24  return i;
25  else
26  return i - 1;
27  }
28  }
29  return dict->size() - 1;
30 }
31 //-----------------------------------------------------------------------------
36  vector<UShort_t>* order,
37  vector<Float_t>* dict,
38  vector<size_t>* cnt,
39  Int_t n_entries,
40  std::default_random_engine& generator,
41  std::normal_distribution<double>& distribution,
42  size_t maxNumClusters
43 );
44 //-----------------------------------------------------------------------------
45 Int_t newLoc(vector<Int_t>* loc_vec, vector<vector<Int_t>>* loc_vec_vec);
46 void removeDiff(Float_t distance, Float_t min, map<Float_t, set<Float_t>>* distance_min_set_map);
47 void addDiff(Float_t distance, Float_t min, map<Float_t, set<Float_t>>* distance_min_set_map);
48 //-----------------------------------------------------------------------------
49 Float_t approx(vector<UShort_t>* order, vector<Float_t>* dict, vector<size_t>* cnt, Int_t n_entries, std::default_random_engine& generator, std::normal_distribution<double>& distribution, size_t maxNumClusters)
50 {
51  Float_t maxAbsErrorDoubled = (Float_t) 0;
52 
53  map<Float_t, pair<Float_t, Int_t>> min_max_loc_map;
54  vector<vector<Int_t>> loc_vec_vec;
55  vector<Int_t> loc_vec;
56  map<Float_t, set<Float_t>> distance_min_set_map;
57 
58  for (Int_t j = 0 ; j < n_entries; j++){
59  Float_t number = distribution(generator);
60  Float_t* gen_ = &number;
61 
62  map<Float_t, pair<Float_t, Int_t>>::iterator mmlm = min_max_loc_map.find(*gen_);
63 
64  if (mmlm != min_max_loc_map.end())
65  loc_vec_vec[mmlm->second.second].push_back(j);
66  else {
67  Int_t loc = newLoc(&loc_vec, &loc_vec_vec);
68 
69  loc_vec_vec[loc].push_back(j);
70 
71  min_max_loc_map[*gen_] = pair<Float_t, Int_t>(*gen_, loc);
72 
73  mmlm = min_max_loc_map.find(*gen_);
74  if (mmlm != min_max_loc_map.begin() && *gen_ <= prev(mmlm)->second.first) {
75  loc_vec_vec[prev(mmlm)->second.second].push_back(j);
76  loc_vec_vec[mmlm->second.second].clear();
77  loc_vec.push_back(mmlm->second.second);
78 
79  min_max_loc_map.erase(mmlm);
80 
81  } else if (min_max_loc_map.size() >= 2) {
82  if (mmlm != min_max_loc_map.begin() && mmlm != prev(min_max_loc_map.end())) {
83 
84  removeDiff(next(mmlm)->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
85  }
86 
87  if (mmlm != min_max_loc_map.begin())
88  addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
89 
90  if (mmlm != prev(min_max_loc_map.end()))
91  addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
92  }
93  }
94 
95  if (min_max_loc_map.size() <= maxNumClusters)
96  continue;
97 
98  map<Float_t, set<Float_t>>::iterator dmsm = distance_min_set_map.begin();
99  Float_t min = *(dmsm->second.begin());
100 
101  dmsm->second.erase(min);
102  if (dmsm->second.empty())
103  distance_min_set_map.erase(dmsm);
104 
105  mmlm = min_max_loc_map.find(min);
106  if (mmlm != min_max_loc_map.begin())
107  removeDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
108 
109  if (next(mmlm) != prev(min_max_loc_map.end()))
110  removeDiff(next(next(mmlm))->second.first - next(mmlm)->first, next(mmlm)->first, &distance_min_set_map);
111 
112  vector<Int_t>* s = &(loc_vec_vec[next(mmlm)->second.second]);
113  loc_vec_vec[mmlm->second.second].insert(loc_vec_vec[mmlm->second.second].end(),s->begin(), s->end());
114  mmlm->second.first = next(mmlm)->second.first;
115  min_max_loc_map.erase(next(mmlm));
116  mmlm = min_max_loc_map.find(min);
117  maxAbsErrorDoubled = max(maxAbsErrorDoubled, mmlm->second.first - mmlm->first);
118  if (mmlm != min_max_loc_map.begin())
119  addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
120 
121  if (mmlm != prev(min_max_loc_map.end()))
122  addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
123 
124  }
125 
126  order->resize(n_entries);
127  for (const auto &mmlm : min_max_loc_map) {
128  Double_t estimate = (Double_t) (mmlm.first + mmlm.second.first) / (Double_t) 2;
129 
130  for (const auto &index : loc_vec_vec[mmlm.second.second]) {
131  (*order)[index] = dict->size();
132  }
133 
134  dict->push_back(estimate);
135  cnt->push_back(loc_vec_vec[mmlm.second.second].size());
136  }
137 
138  return maxAbsErrorDoubled / (double) 2; //sqrt((squaredSum / (Double_t) n_entries) - avg * avg);
139 }
140 
141 Int_t newLoc(vector<Int_t>* loc_vec, vector<vector<Int_t>>* loc_vec_vec)
142 {
143  if (!loc_vec->empty()) {
144  Int_t loc = loc_vec->back();
145  loc_vec->pop_back();
146  return loc;
147  }
148 
149  Int_t loc = loc_vec_vec->size();
150  loc_vec_vec->push_back({});
151  return loc;
152 }
153 
154 
155 void removeDiff(Float_t distance, Float_t min, map<Float_t, set<Float_t>>* distance_min_set_map)
156 {
157  map<Float_t, set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
158  dmsm->second.erase(min);
159 
160  if (dmsm->second.empty())
161  distance_min_set_map->erase(dmsm);
162 }
163 
164 void addDiff(Float_t distance, Float_t min, map<Float_t, set<Float_t>>* distance_min_set_map)
165 {
166  map<Float_t, set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
167  if (dmsm == distance_min_set_map->end()) {
168  (*distance_min_set_map)[distance] = {min};
169  } else {
170  dmsm->second.insert(min);
171  }
172 }