ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
compressor.h
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file compressor.h
1 
7 #include <TTree.h>
8 
9 #include <fstream>
10 #include <map>
11 #include <set>
12 #include <vector>
13 
14 //-----------------------------------------------------------------------------
19  std::vector<UShort_t>* order,
20  std::vector<Float_t>* dict,
21  std::vector<size_t>* cnt,
22  Int_t n_entries,
23  TTree* t,
24  Float_t* gen_,
25  size_t maxNumClusters
26 );
27 //-----------------------------------------------------------------------------
28 Int_t newLoc(std::vector<Int_t>* loc_vec, std::vector<std::set<Int_t>>* loc_set_vec);
29 void removeDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map);
30 void addDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map);
31 //-----------------------------------------------------------------------------
32 Float_t approx(std::vector<UShort_t>* order, std::vector<Float_t>* dict, std::vector<size_t>* cnt, Int_t n_entries, TTree* t, Float_t* gen_, size_t maxNumClusters)
33 {
34  Float_t maxAbsErrorDoubled = (Float_t) 0;
35 
36  std::map<Float_t, std::pair<Float_t, Int_t>> min_max_loc_map;
37  std::vector<std::set<Int_t>> loc_set_vec;
38  std::vector<Int_t> loc_vec;
39  std::map<Float_t, std::set<Float_t>> distance_min_set_map;
40 
41  for (Int_t j = 0 ; j < n_entries; j++){
42  t->GetEntry(j);
43 
44  std::map<Float_t, std::pair<Float_t, Int_t>>::iterator mmlm = min_max_loc_map.find(*gen_);
45 
46  if (mmlm != min_max_loc_map.end())
47  loc_set_vec[mmlm->second.second].insert(j);
48  else {
49  Int_t loc = newLoc(&loc_vec, &loc_set_vec);
50 
51  loc_set_vec[loc].insert(j);
52 
53  min_max_loc_map[*gen_] = std::pair<Float_t, Int_t>(*gen_, loc);
54 
55  mmlm = min_max_loc_map.find(*gen_);
56  if (mmlm != min_max_loc_map.begin() && *gen_ <= prev(mmlm)->second.first) {
57  loc_set_vec[prev(mmlm)->second.second].insert(j);
58  loc_set_vec[mmlm->second.second].clear();
59  loc_vec.push_back(mmlm->second.second);
60 
61  min_max_loc_map.erase(mmlm);
62 
63  } else if (min_max_loc_map.size() >= 2) {
64  if (mmlm != min_max_loc_map.begin() && mmlm != prev(min_max_loc_map.end())) {
65 
66  removeDiff(next(mmlm)->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
67  }
68 
69  if (mmlm != min_max_loc_map.begin())
70  addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
71 
72  if (mmlm != prev(min_max_loc_map.end()))
73  addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
74  }
75  }
76 
77  if (min_max_loc_map.size() <= maxNumClusters)
78  continue;
79 
80  std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map.begin();
81  Float_t min = *(dmsm->second.begin());
82 
83  dmsm->second.erase(min);
84  if (dmsm->second.empty())
85  distance_min_set_map.erase(dmsm);
86 
87  mmlm = min_max_loc_map.find(min);
88  if (mmlm != min_max_loc_map.begin())
89  removeDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
90 
91  if (next(mmlm) != prev(min_max_loc_map.end()))
92  removeDiff(next(next(mmlm))->second.first - next(mmlm)->first, next(mmlm)->first, &distance_min_set_map);
93 
94  std::set<Int_t>* s = &(loc_set_vec[next(mmlm)->second.second]);
95  loc_set_vec[mmlm->second.second].insert(s->begin(), s->end());
96  mmlm->second.first = next(mmlm)->second.first;
97  min_max_loc_map.erase(next(mmlm));
98  mmlm = min_max_loc_map.find(min);
99  maxAbsErrorDoubled = std::max(maxAbsErrorDoubled, mmlm->second.first - mmlm->first);
100  if (mmlm != min_max_loc_map.begin())
101  addDiff(mmlm->second.first - prev(mmlm)->first, prev(mmlm)->first, &distance_min_set_map);
102 
103  if (mmlm != prev(min_max_loc_map.end()))
104  addDiff(next(mmlm)->second.first - mmlm->first, mmlm->first, &distance_min_set_map);
105 
106  }
107 
108  Double_t squaredSum = 0;
109  Double_t sum = 0;
110 
111  order->resize(n_entries);
112  for (const auto &mmlm : min_max_loc_map) {
113  Double_t estimate = (Double_t) (mmlm.first + mmlm.second.first) / (Double_t) 2;
114 
115  for (const auto &index : loc_set_vec[mmlm.second.second]) {
116  (*order)[index] = dict->size();
117 
118  t->GetEntry(index);
119  Double_t delta = std::fabs(*gen_ - estimate);
120  squaredSum += (delta * delta);
121  sum += delta;
122  }
123 
124  dict->push_back(estimate);
125  cnt->push_back(loc_set_vec[mmlm.second.second].size());
126  }
127 
128  Double_t avg = sum / (Double_t) n_entries;
129  return sqrt((squaredSum / (Double_t) n_entries) - avg * avg);
130 }
131 
132 Int_t newLoc(std::vector<Int_t>* loc_vec, std::vector<std::set<Int_t>>* loc_set_vec)
133 {
134  if (!loc_vec->empty()) {
135  Int_t loc = loc_vec->back();
136  loc_vec->pop_back();
137  return loc;
138  }
139 
140  Int_t loc = loc_set_vec->size();
141  loc_set_vec->push_back({});
142  return loc;
143 }
144 
145 
146 void removeDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map)
147 {
148  std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
149  dmsm->second.erase(min);
150 
151  if (dmsm->second.empty())
152  distance_min_set_map->erase(dmsm);
153 }
154 
155 void addDiff(Float_t distance, Float_t min, std::map<Float_t, std::set<Float_t>>* distance_min_set_map)
156 {
157  std::map<Float_t, std::set<Float_t>>::iterator dmsm = distance_min_set_map->find(distance);
158  if (dmsm == distance_min_set_map->end()) {
159  (*distance_min_set_map)[distance] = {min};
160  } else {
161  dmsm->second.insert(min);
162  }
163 }