13 static void counting_sort(
unsigned int MAX, std::vector<BinEntryPair5D>&
A) {
14 unsigned int SIZE = A.size();
15 std::vector<BinEntryPair5D>
B(SIZE);
16 std::vector<unsigned int>
C(MAX + 1, 0);
18 for (
unsigned int i = 0; i < SIZE; ++i) {
22 for (
unsigned int i = 1; i <= MAX; ++i) {
26 for (
long i = SIZE - 1; i >= 0; --i) {
27 B[C[A[i].bin] - 1] = A[i];
31 for (
unsigned int i = 0; i < SIZE; ++i) {
36 static void counting_sort(
unsigned int SIZE,
unsigned int MAX,
unsigned int*
A,
37 unsigned int*
B,
unsigned int*
C) {
38 for (
unsigned int i = 0; i < SIZE; ++i) {
41 for (
unsigned int i = 1; i < MAX; ++i) {
44 for (
int i = (SIZE - 1); i >= 0; --i) {
45 B[C[A[i] >> 20] - 1] = A[i];
48 for (
unsigned int i = 0; i < SIZE; ++i) {
54 bins_vec[zoomlevel]->clear();
57 unsigned int n_phi = n_phi_bins[zoomlevel];
58 unsigned int n_d = n_d_bins[zoomlevel];
59 unsigned int n_k = n_k_bins[zoomlevel];
60 unsigned int n_dzdl = n_dzdl_bins[zoomlevel];
61 unsigned int n_z0 = n_z0_bins[zoomlevel];
63 unsigned int total_bins = n_phi * n_d * n_k * n_dzdl * n_z0;
65 float low_phi = zoomranges[zoomlevel].min_phi;
66 float high_phi = zoomranges[zoomlevel].max_phi;
67 float low_d = zoomranges[zoomlevel].min_d;
68 float high_d = zoomranges[zoomlevel].max_d;
69 float low_z0 = zoomranges[zoomlevel].min_d;
70 float high_z0 = zoomranges[zoomlevel].max_d;
71 float low_dzdl = zoomranges[zoomlevel].min_dzdl;
72 float high_dzdl = zoomranges[zoomlevel].max_dzdl;
73 float inv_phi_range = 1. / (high_phi - low_phi);
74 float inv_d_range = 1. / (high_d - low_d);
75 float inv_z0_range = 1. / (high_z0 - low_z0);
76 float inv_dzdl_range = 1. / (high_dzdl - low_dzdl);
77 float k_size = (zoomranges[zoomlevel].max_k - zoomranges[zoomlevel].min_k) /
80 unsigned int pair_counter = 0;
81 vector<vector<SimpleHit3D> > four_pairs;
82 vector<SimpleHit3D> onepair;
83 unsigned int pair_index[4];
85 four_pairs.assign(4, onepair);
86 float x1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
87 float y1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
88 float z1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
89 float x2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
90 float y2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
91 float z2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
92 float min_phi_1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
93 float max_phi_1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
94 float min_phi_2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
95 float max_phi_2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
96 float min_d_1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
97 float max_d_1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
98 float min_d_2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
99 float max_d_2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
100 float min_dzdl_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
101 float max_dzdl_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
102 float min_z0_1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
103 float max_z0_1_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
104 float min_z0_2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
105 float max_z0_2_a[4]
__attribute__((aligned(16))) = {0., 0., 0., 0.};
106 float min_k = zoomranges[zoomlevel].min_k;
107 float max_k = min_k + k_size;
108 float error_scale = 2.;
109 for (
unsigned int k_bin = 0; k_bin < n_k; ++k_bin) {
111 float min_kappa = pow(min_k, pwr);
112 float max_kappa = pow(max_k, pwr);
113 float avg = 0.5 * (max_kappa + min_kappa);
114 float width = 0.5 * (max_kappa - min_kappa);
115 max_kappa = avg + width * bin_scale;
116 min_kappa = avg - width * bin_scale;
118 float min_k_a[4]
__attribute__((aligned(16))) = {min_kappa, min_kappa,
119 min_kappa, min_kappa};
120 float max_k_a[4]
__attribute__((aligned(16))) = {max_kappa, max_kappa,
121 max_kappa, max_kappa};
124 for (
unsigned int i = 0; i < pairs_vec[zoomlevel]->size(); i++) {
125 pair_index[pair_counter] = i;
126 four_pairs[pair_counter][0] =
127 ((*(hits_vec[zoomlevel]))[(*(pairs_vec[zoomlevel]))[i].first]);
128 four_pairs[pair_counter][1] =
129 ((*(hits_vec[zoomlevel]))[(*(pairs_vec[zoomlevel]))[i].second]);
130 x1_a[pair_counter] = four_pairs[pair_counter][0].get_x();
131 y1_a[pair_counter] = four_pairs[pair_counter][0].get_y();
132 z1_a[pair_counter] = four_pairs[pair_counter][0].get_z();
133 x2_a[pair_counter] = four_pairs[pair_counter][1].get_x();
134 y2_a[pair_counter] = four_pairs[pair_counter][1].get_y();
135 z2_a[pair_counter] = four_pairs[pair_counter][1].get_z();
137 if (pair_counter == 4) {
139 x1_a, x2_a, y1_a, y2_a, z1_a, z2_a, min_k_a, max_k_a, min_phi_1_a,
140 max_phi_1_a, min_phi_2_a, max_phi_2_a, min_d_1_a, max_d_1_a,
141 min_d_2_a, max_d_2_a, min_dzdl_a, max_dzdl_a, min_z0_1_a,
142 max_z0_1_a, min_z0_2_a, max_z0_2_a);
143 for (
unsigned int h = 0;
h < pair_counter; ++
h) {
145 float dr0 = sqrt((2.0*sqrt(four_pairs[
h][0].get_size(0,0))) *
146 (2.0*sqrt(four_pairs[
h][0].get_size(0,0))) +
147 (2.0*sqrt(four_pairs[
h][0].get_size(1,1))) *
148 (2.0*sqrt(four_pairs[
h][0].get_size(1,1))));
150 float r0 = sqrt(four_pairs[
h][0].get_x() * four_pairs[
h][0].get_x() +
151 four_pairs[
h][0].get_y() * four_pairs[
h][0].get_y());
153 float dr1 = sqrt((2.0*sqrt(four_pairs[
h][1].get_size(0,0))) *
154 (2.0*sqrt(four_pairs[
h][1].get_size(0,0))) +
155 (2.0*sqrt(four_pairs[
h][1].get_size(1,1))) *
156 (2.0*sqrt(four_pairs[
h][1].get_size(1,1))));
158 float r1 = sqrt(four_pairs[
h][1].get_x() * four_pairs[
h][1].get_x() +
159 four_pairs[
h][1].get_y() * four_pairs[
h][1].get_y());
160 float r1r0_inv = 1. / (r1 - r0);
163 float dphi = (dr0 + dr1) * r1r0_inv;
165 float phi_scatt = phiError(
166 four_pairs[
h][1], min_kappa, max_kappa,
167 zoomranges[zoomlevel].min_d, zoomranges[zoomlevel].max_d,
168 zoomranges[zoomlevel].min_z0, zoomranges[zoomlevel].max_z0,
169 zoomranges[zoomlevel].min_dzdl, zoomranges[zoomlevel].max_dzdl,
173 min_phi_1_a[
h] -= dphi;
174 min_phi_2_a[
h] -= dphi;
175 max_phi_1_a[
h] += dphi;
176 max_phi_2_a[
h] += dphi;
179 float dd = r0 * (dr0 + dr1) * r1r0_inv + dr0;
180 dd += phi_scatt *
r1;
188 float ddzdl = ((2.0*sqrt(four_pairs[h][0].get_size(2,2))) +
189 (2.0*sqrt(four_pairs[h][1].get_size(2,2)))) * r1r0_inv;
191 float dzdl_scatt = dzdlError(
192 four_pairs[h][1], min_kappa, max_kappa,
193 zoomranges[zoomlevel].min_d, zoomranges[zoomlevel].max_d,
194 zoomranges[zoomlevel].min_z0, zoomranges[zoomlevel].max_z0,
195 zoomranges[zoomlevel].min_dzdl, zoomranges[zoomlevel].max_dzdl,
198 ddzdl *= error_scale;
199 min_dzdl_a[
h] -= ddzdl;
200 max_dzdl_a[
h] += ddzdl;
204 r0 * ((2.0*sqrt(four_pairs[h][0].get_size(2,2))) +
205 (2.0*sqrt(four_pairs[h][1].get_size(2,2)))) * r1r0_inv;
206 dz0 += dzdl_scatt *
r1;
208 min_z0_1_a[
h] -= dz0;
209 min_z0_2_a[
h] -= dz0;
210 max_z0_1_a[
h] += dz0;
211 max_z0_2_a[
h] += dz0;
213 if (separate_by_helicity ==
false) {
214 fillBins(total_bins, pair_counter, pair_index, min_phi_1_a,
215 max_phi_1_a, min_d_1_a, max_d_1_a, min_dzdl_a, max_dzdl_a,
216 min_z0_1_a, max_z0_1_a, four_pairs, n_d, n_k, n_dzdl, n_z0,
217 k_bin, n_phi, zoomlevel, low_phi, high_phi, low_d, high_d,
218 low_z0, high_z0, low_dzdl, high_dzdl, inv_phi_range,
219 inv_d_range, inv_z0_range, inv_dzdl_range, vote_array);
220 fillBins(total_bins, pair_counter, pair_index, min_phi_2_a,
221 max_phi_2_a, min_d_2_a, max_d_2_a, min_dzdl_a, max_dzdl_a,
222 min_z0_2_a, max_z0_2_a, four_pairs, n_d, n_k, n_dzdl, n_z0,
223 k_bin, n_phi, zoomlevel, low_phi, high_phi, low_d, high_d,
224 low_z0, high_z0, low_dzdl, high_dzdl, inv_phi_range,
225 inv_d_range, inv_z0_range, inv_dzdl_range, vote_array);
227 if (helicity ==
false) {
228 fillBins(total_bins, pair_counter, pair_index, min_phi_1_a,
229 max_phi_1_a, min_d_1_a, max_d_1_a, min_dzdl_a, max_dzdl_a,
230 min_z0_1_a, max_z0_1_a, four_pairs, n_d, n_k, n_dzdl, n_z0,
231 k_bin, n_phi, zoomlevel, low_phi, high_phi, low_d, high_d,
232 low_z0, high_z0, low_dzdl, high_dzdl, inv_phi_range,
233 inv_d_range, inv_z0_range, inv_dzdl_range, vote_array);
235 fillBins(total_bins, pair_counter, pair_index, min_phi_2_a,
236 max_phi_2_a, min_d_2_a, max_d_2_a, min_dzdl_a, max_dzdl_a,
237 min_z0_2_a, max_z0_2_a, four_pairs, n_d, n_k, n_dzdl, n_z0,
238 k_bin, n_phi, zoomlevel, low_phi, high_phi, low_d, high_d,
239 low_z0, high_z0, low_dzdl, high_dzdl, inv_phi_range,
240 inv_d_range, inv_z0_range, inv_dzdl_range, vote_array);
246 if (pair_counter != 0) {
248 x1_a, x2_a, y1_a, y2_a, z1_a, z2_a, min_k_a, max_k_a, min_phi_1_a,
249 max_phi_1_a, min_phi_2_a, max_phi_2_a, min_d_1_a, max_d_1_a,
250 min_d_2_a, max_d_2_a, min_dzdl_a, max_dzdl_a, min_z0_1_a, max_z0_1_a,
251 min_z0_2_a, max_z0_2_a);
252 for (
unsigned int h = 0;
h < pair_counter; ++
h) {
254 float dr0 = sqrt((2.0*sqrt(four_pairs[
h][0].get_size(0,0))) *
255 (2.0*sqrt(four_pairs[
h][0].get_size(0,0))) +
256 (2.0*sqrt(four_pairs[
h][0].get_size(1,1))) *
257 (2.0*sqrt(four_pairs[
h][0].get_size(1,1))));
259 float r0 = sqrt(four_pairs[
h][0].get_x() * four_pairs[
h][0].get_x() +
260 four_pairs[
h][0].get_y() * four_pairs[
h][0].get_y());
262 float dr1 = sqrt((2.0*sqrt(four_pairs[
h][1].get_size(0,0))) *
263 (2.0*sqrt(four_pairs[
h][1].get_size(0,0))) +
264 (2.0*sqrt(four_pairs[
h][1].get_size(1,1))) *
265 (2.0*sqrt(four_pairs[
h][1].get_size(1,1))));
267 float r1 = sqrt(four_pairs[
h][1].get_x() * four_pairs[
h][1].get_x() +
268 four_pairs[
h][1].get_y() * four_pairs[
h][1].get_y());
269 float r1r0_inv = 1. / (r1 - r0);
272 float dphi = (dr0 + dr1) * r1r0_inv;
274 float phi_scatt = phiError(
275 four_pairs[
h][1], min_kappa, max_kappa, zoomranges[zoomlevel].min_d,
276 zoomranges[zoomlevel].max_d, zoomranges[zoomlevel].min_z0,
277 zoomranges[zoomlevel].max_z0, zoomranges[zoomlevel].min_dzdl,
278 zoomranges[zoomlevel].max_dzdl,
true);
281 min_phi_1_a[
h] -= dphi;
282 min_phi_2_a[
h] -= dphi;
283 max_phi_1_a[
h] += dphi;
284 max_phi_2_a[
h] += dphi;
287 float dd = r0 * (dr0 + dr1) * r1r0_inv + dr0;
288 dd += phi_scatt *
r1;
296 float ddzdl = ((2.0*sqrt(four_pairs[h][0].get_size(2,2))) +
297 (2.0*sqrt(four_pairs[h][1].get_size(2,2)))) * r1r0_inv;
299 float dzdl_scatt = dzdlError(
300 four_pairs[h][1], min_kappa, max_kappa, zoomranges[zoomlevel].min_d,
301 zoomranges[zoomlevel].max_d, zoomranges[zoomlevel].min_z0,
302 zoomranges[zoomlevel].max_z0, zoomranges[zoomlevel].min_dzdl,
303 zoomranges[zoomlevel].max_dzdl,
true);
305 ddzdl *= error_scale;
306 min_dzdl_a[
h] -= ddzdl;
307 max_dzdl_a[
h] += ddzdl;
310 float dz0 = r0 * ((2.0*sqrt(four_pairs[h][0].get_size(2,2))) +
311 (2.0*sqrt(four_pairs[h][1].get_size(2,2)))) * r1r0_inv;
312 dz0 += dzdl_scatt *
r1;
314 min_z0_1_a[
h] -= dz0;
315 min_z0_2_a[
h] -= dz0;
316 max_z0_1_a[
h] += dz0;
317 max_z0_2_a[
h] += dz0;
319 if (separate_by_helicity ==
false) {
320 fillBins(total_bins, pair_counter, pair_index, min_phi_1_a, max_phi_1_a,
321 min_d_1_a, max_d_1_a, min_dzdl_a, max_dzdl_a, min_z0_1_a,
322 max_z0_1_a, four_pairs, n_d, n_k, n_dzdl, n_z0, k_bin, n_phi,
323 zoomlevel, low_phi, high_phi, low_d, high_d, low_z0, high_z0,
324 low_dzdl, high_dzdl, inv_phi_range, inv_d_range, inv_z0_range,
325 inv_dzdl_range, vote_array);
326 fillBins(total_bins, pair_counter, pair_index, min_phi_2_a, max_phi_2_a,
327 min_d_2_a, max_d_2_a, min_dzdl_a, max_dzdl_a, min_z0_2_a,
328 max_z0_2_a, four_pairs, n_d, n_k, n_dzdl, n_z0, k_bin, n_phi,
329 zoomlevel, low_phi, high_phi, low_d, high_d, low_z0, high_z0,
330 low_dzdl, high_dzdl, inv_phi_range, inv_d_range, inv_z0_range,
331 inv_dzdl_range, vote_array);
333 if (helicity ==
false) {
334 fillBins(total_bins, pair_counter, pair_index, min_phi_1_a,
335 max_phi_1_a, min_d_1_a, max_d_1_a, min_dzdl_a, max_dzdl_a,
336 min_z0_1_a, max_z0_1_a, four_pairs, n_d, n_k, n_dzdl, n_z0,
337 k_bin, n_phi, zoomlevel, low_phi, high_phi, low_d, high_d,
338 low_z0, high_z0, low_dzdl, high_dzdl, inv_phi_range,
339 inv_d_range, inv_z0_range, inv_dzdl_range, vote_array);
341 fillBins(total_bins, pair_counter, pair_index, min_phi_2_a,
342 max_phi_2_a, min_d_2_a, max_d_2_a, min_dzdl_a, max_dzdl_a,
343 min_z0_2_a, max_z0_2_a, four_pairs, n_d, n_k, n_dzdl, n_z0,
344 k_bin, n_phi, zoomlevel, low_phi, high_phi, low_d, high_d,
345 low_z0, high_z0, low_dzdl, high_dzdl, inv_phi_range,
346 inv_d_range, inv_z0_range, inv_dzdl_range, vote_array);
354 if (vote_array.
size == 0) {
357 if (vote_array.
size < (1 << 14)) {
358 unsigned int B[1 << 14];
359 unsigned int C[(1 << 12) + 1] = {0};
362 for (
unsigned int i = 0; i < vote_array.
size; ++i) {
363 (*(bins_vec[zoomlevel]))[i].bin = (vote_array.
arr[i]) >> 20;
364 (*(bins_vec[zoomlevel]))[i].entry =
365 (vote_array.
arr[i]) & ((
unsigned int)1048575);
366 (*(bins_vec[zoomlevel]))[i].is_seed =
false;
370 for (
unsigned int i = 0; i < vote_array.
size; ++i) {
371 (*(bins_vec[zoomlevel]))[i].bin = (vote_array[i]) >> 20;
372 (*(bins_vec[zoomlevel]))[i].entry =
373 (vote_array[i]) & ((
unsigned int)1048575);
374 (*(bins_vec[zoomlevel]))[i].is_seed =
false;
376 if (bins_vec[zoomlevel]->size() > total_bins) {
379 sort(bins_vec[zoomlevel]->begin(), bins_vec[zoomlevel]->end());
385 unsigned int ,
unsigned int pair_counter,
386 unsigned int* pair_index,
float* min_phi,
float* max_phi,
float* min_d,
387 float* max_d,
float* min_dzdl,
float* max_dzdl,
float* min_z0,
388 float* max_z0, vector<vector<SimpleHit3D> >& ,
unsigned int n_d,
389 unsigned int n_k,
unsigned int n_dzdl,
unsigned int n_z0,
390 unsigned int k_bin,
unsigned int n_phi,
unsigned int ,
391 float low_phi,
float high_phi,
float low_d,
float high_d,
float low_z0,
392 float high_z0,
float low_dzdl,
float high_dzdl,
float inv_phi_range,
393 float inv_d_range,
float inv_z0_range,
float inv_dzdl_range,
395 unsigned int buffer[(1 << 10)];
396 unsigned int bufnum = 0;
398 for (
unsigned int i = 0; i < pair_counter; ++i) {
399 if ((max_d[i] < low_d) || (min_d[i] > high_d)) {
402 if ((max_z0[i] < low_z0) || (min_z0[i] > high_z0)) {
405 if ((max_dzdl[i] < low_dzdl) || (min_dzdl[i] > high_dzdl)) {
409 unsigned int index = pair_index[i];
411 unsigned int low_d_bin = 0;
412 unsigned int high_d_bin = (n_d - 1);
413 unsigned int low_z0_bin = 0;
414 unsigned int high_z0_bin = (n_z0 - 1);
415 unsigned int low_dzdl_bin = 0;
416 unsigned int high_dzdl_bin = (n_dzdl - 1);
417 if (min_d[i] > low_d) {
419 (
unsigned int)(((min_d[i] - low_d) * inv_d_range) * ((
float)(n_d)));
421 if (max_d[i] < high_d) {
423 (
unsigned int)(((max_d[i] - low_d) * inv_d_range) * ((
float)(n_d)));
425 if (min_z0[i] > low_z0) {
426 low_z0_bin = (
unsigned int)(((min_z0[i] - low_z0) * inv_z0_range) *
429 if (max_z0[i] < high_z0) {
430 high_z0_bin = (
unsigned int)(((max_z0[i] - low_z0) * inv_z0_range) *
433 if (min_dzdl[i] > low_dzdl) {
435 (
unsigned int)(((min_dzdl[i] - low_dzdl) * inv_dzdl_range) *
438 if (max_dzdl[i] < high_dzdl) {
440 (
unsigned int)(((max_dzdl[i] - low_dzdl) * inv_dzdl_range) *
444 if (min_phi[i] >= 0.) {
445 if ((max_phi[i] < low_phi) || (min_phi[i] > high_phi)) {
448 unsigned int low_phi_bin = 0;
449 unsigned int high_phi_bin = (n_phi - 1);
450 if (min_phi[i] > low_phi) {
451 low_phi_bin = (
unsigned int)(((min_phi[i] - low_phi) * inv_phi_range) *
454 if (max_phi[i] < high_phi) {
455 high_phi_bin = (
unsigned int)(((max_phi[i] - low_phi) * inv_phi_range) *
458 for (
unsigned int phib = low_phi_bin; phib <= high_phi_bin; ++phib) {
459 for (
unsigned int db = low_d_bin;
db <= high_d_bin; ++
db) {
460 for (
unsigned int z0b = low_z0_bin; z0b <= high_z0_bin; ++z0b) {
461 for (
unsigned int dzdlb = low_dzdl_bin; dzdlb <= high_dzdl_bin;
464 n_d, n_k, n_dzdl, n_z0, phib,
db, k_bin, dzdlb, z0b);
465 buffer[bufnum] = ((bin << 20) ^ index);