18 #include <xmmintrin.h>
23 using namespace Eigen;
24 using namespace SeamStress;
30 : hit1(h1), hit2(h2), hit3(h3),
track(t), chi2(c) {}
34 return (hit1 < other.
hit1) ||
35 ((hit2 < other.
hit2) && (hit1 == other.
hit1)) ||
36 ((hit3 < other.
hit3) && (hit1 == other.
hit1) &&
37 (hit2 == other.
hit2));
41 return ((hit1 == other.
hit1) && (hit2 == other.
hit2) &&
42 (hit3 == other.
hit3));
45 unsigned int hit1, hit2, hit3,
track;
50 vector<SimpleTrack3D>& ,
51 vector<bool>& usetrack,
52 vector<float>& next_best_chi2) {
53 vector<hitTriplet> trips;
54 for (
unsigned int i = 0; i < input.size(); ++i) {
55 for (
unsigned int h1 = 0;
h1 < input[i].hits.size(); ++
h1) {
56 for (
unsigned int h2 = (
h1 + 1);
h2 < input[i].hits.size(); ++
h2) {
57 for (
unsigned int h3 = (
h2 + 1);
h3 < input[i].hits.size(); ++
h3) {
58 if (cut_on_dca ==
false) {
61 input[i].
hits[
h3].get_id(), i, track_states[i].chi2));
67 if (trips.size() == 0) {
70 sort(trips.begin(), trips.end());
74 while (pos < trips.size()) {
75 unsigned int next_pos = pos + 1;
76 if (next_pos >= trips.size()) {
79 while (trips[pos] == trips[next_pos]) {
81 if (next_pos >= trips.size()) {
85 if ((next_pos - pos) > 1) {
86 float best_chi2 = trips[
pos].chi2;
87 float next_chi2 = trips[pos + 1].chi2;
88 unsigned int best_pos =
pos;
89 for (
unsigned int i = (pos + 1); i < next_pos; ++i) {
91 input[trips[best_pos].track].hits.size()) {
93 }
else if ((input[trips[i].
track].
hits.size() >
94 input[trips[best_pos].track].hits.size()) ||
95 (input[trips[i].
track].
hits.back().get_layer() >
96 input[trips[best_pos].track].hits.back().get_layer())) {
97 next_chi2 = best_chi2;
98 best_chi2 = trips[i].chi2;
102 if ((trips[i].chi2 < best_chi2) ||
103 (usetrack[trips[best_pos].track] ==
false)) {
104 next_chi2 = best_chi2;
105 best_chi2 = trips[i].chi2;
107 }
else if (trips[i].chi2 < next_chi2) {
108 next_chi2 = trips[i].chi2;
111 for (
unsigned int i = pos; i < next_pos; ++i) {
113 usetrack[trips[i].track] =
false;
115 next_best_chi2[trips[i].track] = next_chi2;
126 unsigned int n_k,
unsigned int n_dzdl,
131 vector<float>&
radius,
float Bfield)
132 :
HelixHough(n_phi, n_d, n_k, n_dzdl, n_z0, min_resolution, max_resolution,
134 fast_chi2_cut_par0(12.),
135 fast_chi2_cut_par1(0.),
138 chi2_removal_cut(1.),
148 reject_ghosts(
false),
157 require_pixels(
false) {
158 vector<float> detector_material;
160 for (
unsigned int i = 0; i < radius.size(); ++i) {
163 for (
unsigned int i = 0; i < material.size(); ++i) {
165 sqrt(3. * material[i]));
166 detector_material.push_back(3. * material[i]);
172 float total_scatter_2 = 0.;
181 vector<SimpleHit3D> one_layer;
183 for (
unsigned int i = 0; i < 4; ++i) {
190 vector<vector<unsigned int> >& zoom_profile,
unsigned int minzoom,
192 float Bfield,
bool parallel,
unsigned int num_threads)
194 fast_chi2_cut_par0(12.),
195 fast_chi2_cut_par1(0.),
198 chi2_removal_cut(1.),
208 reject_ghosts(
false),
215 nthreads(num_threads),
218 is_parallel(parallel),
222 require_pixels(
false){
223 vector<float> detector_material;
225 for (
unsigned int i = 0; i < radius.size(); ++i) {
228 for (
unsigned int i = 0; i < material.size(); ++i) {
230 sqrt(3. * material[i]));
231 detector_material.push_back(3. * material[i]);
237 float total_scatter_2 = 0.;
245 vector<SimpleHit3D> one_layer;
247 for (
unsigned int i = 0; i < 4; ++i) {
253 Seamstress::init_vector(num_threads,
vss);
255 vssp =
new vector<Seamstress*>();
256 for (
unsigned int i = 0; i <
vss.size(); i++) {
262 vector<vector<unsigned int> > zoom_profile_new;
263 for (
unsigned int i = 1; i < zoom_profile.size(); ++i) {
264 zoom_profile_new.push_back(zoom_profile[i]);
267 for (
unsigned int i = 0; i <
nthreads; ++i) {
269 zoom_profile, minzoom, range, material, radius, Bfield));
284 for (
unsigned int i = 0; i <
vss.size(); i++) {
307 vector<SimpleTrack3D>& output) {
310 for (
unsigned int i = 0; i < input.size(); ++i) {
311 output.push_back(input[i]);
316 unsigned int nt = input.size();
317 vector<bool> usetrack;
318 usetrack.assign(input.size(),
true);
319 vector<float> next_best_chi2;
320 next_best_chi2.assign(input.size(), 99999.);
326 vector<HelixKalmanState> states_new;
328 for (
unsigned int i = 0; i < nt; ++i) {
329 if (usetrack[i] ==
true) {
334 output.push_back(input[i]);
335 output.back().index = (output.size() - 1);
343 for (
unsigned int i = 0; i < output.size(); ++i) {
346 vector<SimpleHit3D> temp_hits;
347 vector<float> chi2_hit;
349 for (
unsigned int i = 0; i < chi2_hit.size(); ++i) {
350 if (chi2_hit[i] < 10. || temp_track.
hits[i].get_layer() < 2) {
351 temp_hits.push_back(temp_track.
hits[i]);
355 temp_track.
hits = temp_hits;
373 for (
int h = (temp_track.
hits.size() - 1);
h >= 0; --
h) {
380 vertex_hit.
set_x(0.0);
381 vertex_hit.
set_y(0.0);
382 vertex_hit.
set_z(0.0);
386 temp_track.
hits.push_back(vertex_hit);
390 temp_track.
hits.pop_back();
406 if (output[i].
phi < 0.) {
407 output[i].phi += 2. *
M_PI;
419 cout <<
"# fits = " <<
nfits << endl;
420 cout <<
"findTracks called " <<
findtracksiter <<
" times" << endl;
421 cout <<
"CAtime = " <<
CAtime << endl;
422 cout <<
"KALime = " <<
KALtime << endl;
431 unsigned int layer_mask[4] = {0, 0, 0, 0};
432 for (
unsigned int i = 0; i < hits.size(); ++i) {
433 if (hits[i].get_layer() < 32) {
434 layer_mask[0] = layer_mask[0] | (1 << hits[i].get_layer());
435 }
else if (hits[i].get_layer() < 64) {
436 layer_mask[1] = layer_mask[1] | (1 << (hits[i].get_layer() - 32));
437 }
else if (hits[i].get_layer() < 96) {
438 layer_mask[2] = layer_mask[2] | (1 << (hits[i].get_layer() - 64));
439 }
else if (hits[i].get_layer() < 128) {
440 layer_mask[3] = layer_mask[3] | (1 << (hits[i].get_layer() - 96));
443 unsigned int nlayers =
444 __builtin_popcount(layer_mask[0]) + __builtin_popcount(layer_mask[1]) +
445 __builtin_popcount(layer_mask[2]) + __builtin_popcount(layer_mask[3]);
448 if (((layer_mask[0] & 1) == 0) || ((layer_mask[0] & 2) == 0)) {
457 float min_d,
float max_d,
float ,
458 float ,
float ,
float max_dzdl,
468 prev_p_inv = 3.33333333333333314e+02 * max_k * Bfield_inv *
469 sqrt(1. - max_dzdl * max_dzdl);
472 float total_scatter_2 = 0.;
477 total_scatter_2 += this_scatter * this_scatter;
479 float angle = p_inv * sqrt(total_scatter_2) * 1.0;
480 float dsize = 0.5 * (max_d - min_d);
482 float returnval = 0.;
483 if (pairvoting ==
false) {
484 if (angle_from_d > angle) {
487 returnval = (angle - angle_from_d);
497 float ,
float ,
float min_z0,
498 float max_z0,
float ,
float max_dzdl,
508 prev_p_inv = 3.33333333333333314e+02 * max_k * Bfield_inv *
509 sqrt(1. - max_dzdl * max_dzdl);
512 float total_scatter_2 = 0.;
517 total_scatter_2 += this_scatter * this_scatter;
519 float angle = p_inv * sqrt(total_scatter_2) * 1.0;
520 float z0size = 0.5 * (max_z0 - min_z0);
522 float returnval = 0.;
523 if (pairvoting ==
false) {
524 if (angle_from_z0 > angle) {
527 returnval = (angle - angle_from_z0);
537 vector<SimpleTrack3D>& tracks,
539 cout <<
"findTracks " << endl;
550 vector<float> chi2_hit;
555 vector<float>& chi2_hit,
560 vector<float> xyres_inv;
562 vector<float> zres_inv;
563 for (
unsigned int i = 0; i < track.
hits.size(); i++) {
565 float ex = (2.0*sqrt(track.
hits[i].get_size(0,0))) *
scale;
566 float ey = (2.0*sqrt(track.
hits[i].get_size(1,1))) *
scale;
567 float ez = (2.0*sqrt(track.
hits[i].get_size(2,2))) *
scale;
569 if (track.
hits[i].get_layer() < 0) {
575 xyres.push_back(sqrt( ex * ex + ey * ey ));
576 xyres_inv.push_back(1. / xyres.back());
577 zres.push_back( ez );
578 zres_inv.push_back(1. / zres.back());
581 chi2_hit.resize(track.
hits.size(), 0.);
583 MatrixXf
y = MatrixXf::Zero(track.
hits.size(), 1);
584 for (
unsigned int i = 0; i < track.
hits.size(); i++) {
585 y(i, 0) = (pow(track.
hits[i].get_x(), 2) + pow(track.
hits[i].get_y(), 2));
586 y(i, 0) *= xyres_inv[i];
589 MatrixXf
X = MatrixXf::Zero(track.
hits.size(), 3);
590 for (
unsigned int i = 0; i < track.
hits.size(); i++) {
591 X(i, 0) = track.
hits[i].get_x();
592 X(i, 1) = track.
hits[i].get_y();
594 X(i, 0) *= xyres_inv[i];
595 X(i, 1) *= xyres_inv[i];
596 X(i, 2) *= xyres_inv[i];
599 MatrixXf Xt = X.transpose();
601 MatrixXf prod = Xt *
X;
603 MatrixXf Xty = Xt *
y;
604 MatrixXf beta = prod.ldlt().solve(Xty);
606 float cx = beta(0, 0) * 0.5;
607 float cy = beta(1, 0) * 0.5;
608 float r = sqrt(cx * cx + cy * cy - beta(2, 0));
610 float phi = atan2(cy, cx);
611 float d = sqrt(cx * cx + cy * cy) -
r;
614 MatrixXf diff = y - (X * beta);
615 MatrixXf chi2 = (diff.transpose()) * diff;
617 float dx = d * cos(phi);
618 float dy = d * sin(phi);
620 MatrixXf
y2 = MatrixXf::Zero(track.
hits.size(), 1);
621 for (
unsigned int i = 0; i < track.
hits.size(); i++) {
622 y2(i, 0) = track.
hits[i].get_z();
623 y2(i, 0) *= zres_inv[i];
626 MatrixXf
X2 = MatrixXf::Zero(track.
hits.size(), 2);
627 for (
unsigned int i = 0; i < track.
hits.size(); i++) {
628 float D = sqrt(pow(dx - track.
hits[i].get_x(), 2) + pow(dy - track.
hits[i].get_y(), 2));
631 if (0.5 * k * D > 0.1) {
632 float v = 0.5 * k *
D;
636 s = 2. * asin(v) /
k;
638 float temp1 = k * D * 0.5;
640 float temp2 = D * 0.5;
645 s += (3. / 20.) *
temp2;
647 s += (5. / 56.) *
temp2;
653 X2(i, 0) *= zres_inv[i];
654 X2(i, 1) *= zres_inv[i];
657 MatrixXf Xt2 = X2.transpose();
658 MatrixXf prod2 = Xt2 *
X2;
660 MatrixXf Xty2 = Xt2 *
y2;
661 MatrixXf beta2 = prod2.ldlt().solve(Xty2);
663 MatrixXf diff2 = y2 - (X2 * beta2);
664 MatrixXf chi2_z = (diff2.transpose()) * diff2;
666 float z0 = beta2(1, 0);
667 float dzdl = beta2(0, 0) / sqrt(1. + beta2(0, 0) * beta2(0, 0));
675 if (track.
kappa != 0.) {
676 r = 1. / track.
kappa;
681 cx = (track.
d +
r) * cos(track.
phi);
682 cy = (track.
d +
r) * sin(track.
phi);
685 for (
unsigned int h = 0;
h < track.
hits.size();
h++) {
686 float dx1 = track.
hits[
h].get_x() - cx;
687 float dy1 = track.
hits[
h].get_y() - cy;
689 float dx2 = track.
hits[
h].get_x() + cx;
690 float dy2 = track.
hits[
h].get_y() + cy;
692 float xydiff1 = sqrt(dx1 * dx1 + dy1 * dy1) -
r;
693 float xydiff2 = sqrt(dx2 * dx2 + dy2 * dy2) -
r;
694 float xydiff = xydiff2;
695 if (fabs(xydiff1) < fabs(xydiff2)) {
699 float ls_xy = xyres[
h];
702 chi2_hit[
h] += xydiff * xydiff / (ls_xy * ls_xy);
703 chi2_hit[
h] += diff2(
h, 0) * diff2(
h, 0);
705 chi2_tot += chi2_hit[
h];
708 unsigned int deg_of_freedom = 2 * track.
hits.size() - 5;
710 return (chi2_tot) / ((float)(deg_of_freedom));
714 unsigned int min_hits,
726 vector<float> chi2_hit;
727 vector<float> temp_hits;
731 bool all_good =
true;
733 for (
unsigned int h = 0;
h < temp_track.
hits.size();
h += 1) {
734 if (chi2_hit[
h] < cut) {
735 track.
hits.push_back(temp_track.
hits[
h]);
740 if (track.
hits.size() < 3) {
743 if (all_good ==
true) {
752 float& ,
int iter = 0,
753 float tempscale = 1.0,
float scale = 1.0) {
761 vector<float> chi2_hit;
762 float r = 1. / track.
kappa;
763 float cx = (track.
d +
r) * cos(track.
phi);
764 float cy = (track.
d +
r) * sin(track.
phi);
766 float phi = atan2(cy, cx);
767 float d = sqrt(cx * cx + cy * cy) -
r;
768 float dx = d * cos(phi);
769 float dy = d * sin(phi);
773 best_ind.assign(layer_indexes.size(), -1);
774 best_chi2.assign(layer_indexes.size(), -1.);
775 for (
unsigned int i = 0; i < temp_track.
hits.size(); i += 1) {
776 float dx1 = temp_track.
hits[i].get_x() - cx;
777 float dy1 = temp_track.
hits[i].get_y() - cy;
778 float dx2 = temp_track.
hits[i].get_x() + cx;
779 float dy2 = temp_track.
hits[i].get_y() + cy;
780 float xydiff1 = sqrt(dx1 * dx1 + dy1 * dy1) -
r;
781 float xydiff2 = sqrt(dx2 * dx2 + dy2 * dy2) -
r;
782 float xydiff = xydiff2;
783 if (fabs(xydiff1) < fabs(xydiff2)) {
787 float tex = (2.0*sqrt(temp_track.
hits[i].get_size(0,0))) * tempscale;
788 float tey = (2.0*sqrt(temp_track.
hits[i].get_size(1,1))) * tempscale;
789 float tez = (2.0*sqrt(temp_track.
hits[i].get_size(2,2))) * tempscale;
791 float dr2 = tex * tex + tey * tey;
792 float chi2_xy = xydiff * xydiff / dr2;
794 float D = sqrt(pow(dx - temp_track.
hits[i].get_x(), 2) +
795 pow(dy - temp_track.
hits[i].get_y(), 2));
798 if (0.5 * k * D > 0.1) {
799 float v = 0.5 * k *
D;
803 s = 2. * asin(v) /
k;
805 float temp1 = k * D * 0.5;
807 float temp2 = D * 0.5;
812 s += (3. / 20.) *
temp2;
814 s += (5. / 56.) *
temp2;
819 float diff = temp_track.
hits[i].get_z() - x2beta;
822 diff * diff / (tez * tez);
830 if ((best_chi2[temp_track.
hits[i].get_layer()] < -0.) ||
831 (best_chi2[temp_track.
hits[i].get_layer()] > chi2_xy)) {
832 best_chi2[temp_track.
hits[i].get_layer()] = chi2_xy;
833 best_ind[temp_track.
hits[i].get_layer()] = i;
838 for (
unsigned int i = 0; i < layer_indexes.size(); i += 1) {
839 if (best_ind[i] < 0) {
842 track.
hits.push_back(temp_track.
hits[best_ind[i]]);
844 if (track.
hits.size() < 8) {
854 float scale2 = sqrt(sqrt(0.5));
857 vector<float> chi2_hit;
858 for (
unsigned int i = 0; i < hits.size(); i += 1) {
859 if (hits[i].get_layer() < 3) {
862 temp_track.
hits.push_back(hits[i]);
865 vector<int> best_ind;
866 vector<float> best_chi2;
868 for (
unsigned int i = 0; i < hits.size(); i += 1) {
869 if (hits[i].get_layer() < 3) {
872 if (layer_indexes[hits[i].get_layer()].size() != 2) {
875 track.
hits.push_back(hits[i]);
879 float tempscale = scale1;
880 for (
int i = 0; i < 20; ++i) {
881 if (
fit_all_update(layer_indexes, temp_track, best_ind, best_chi2, track,
882 chi2, i, tempscale, tempscale) ==
false) {
890 if (
fit_all_update(layer_indexes, temp_track, best_ind, best_chi2, track,
891 chi2, 1.0, 1.0) ==
false) {
896 if ((chi2 < 10.0) && (track.
hits.size() > ((layer_indexes.size() * 1) / 2))) {
903 vector<SimpleHit3D>&
hits, vector<SimpleTrack3D>& tracks,
910 gettimeofday(&t1, NULL);
913 vector<vector<int> > layer_indexes;
914 layer_indexes.assign(
n_layers, vector<int>());
915 for (
unsigned int i = 0; i < hits.size(); ++i) {
916 layer_indexes[hits[i].get_layer()].push_back(i);
919 layer_indexes[i].push_back(-(i + 1));
923 float fit_all_chi2 = -1.;
924 bool fit_all_success =
925 fit_all(hits, layer_indexes, fit_all_track, fit_all_chi2);
926 if (fit_all_success ==
true) {
928 state.
phi = fit_all_track.
phi;
929 if (state.
phi < 0.) {
932 state.
d = fit_all_track.
d;
935 state.
z0 = fit_all_track.
z0;
937 state.
C = Matrix<float, 5, 5>::Zero(5, 5);
938 state.
C(0, 0) = pow(0.01, 2.);
939 state.
C(1, 1) = pow(0.01, 2.);
940 state.
C(2, 2) = pow(0.01 * state.
nu, 2.);
941 state.
C(3, 3) = pow(0.01, 2.);
942 state.
C(4, 4) = pow(0.01, 2.);
952 vector<SimpleHit3D> temp_hits;
954 for (
int h = ((
int)(fit_all_track.
hits.size() - 1));
h >= 0;
h -= 1) {
957 if ((temp_state.
chi2 - state.
chi2) < 4.) {
959 temp_hits.push_back(fit_all_track.
hits[
h]);
971 for (
int h = ((
int)(fit_all_track.
hits.size() - 1));
h >= 0;
h -= 1) {
974 if ((temp_state.
chi2 - state.
chi2) < 10.) {
976 temp_hits.push_back(fit_all_track.
hits[
h]);
979 for (
int h = ((
int)(temp_hits.size() - 1));
h >= 0;
h -= 1) {
980 temp_track.
hits.push_back(temp_hits[
h]);
985 vector<vector<int> > inner_combos;
986 for (
int l = 0; l < 2; ++l) {
987 for (
unsigned int i0 = 0; i0 < layer_indexes[0].size(); ++i0) {
988 for (
unsigned int i1 = 0; i1 < layer_indexes[1].size(); ++i1) {
989 for (
unsigned int i2 = 0; i2 < layer_indexes[2].size(); ++i2) {
990 inner_combos.push_back(vector<int>(3, 0));
991 inner_combos.back()[0] = layer_indexes[0][i0];
992 inner_combos.back()[1] = layer_indexes[1][i1];
993 inner_combos.back()[2] = layer_indexes[2][i2];
999 float best_chi2_p[4] = {-1., -1., -1., -1.};
1000 int best_ind_p[4] = {-1, -1, -1, -1};
1002 for (
unsigned int c = 0;
c < inner_combos.size(); ++
c) {
1005 for (
int l = 2; l >= 0; l -= 1) {
1006 if (inner_combos[
c][l] >= 0) {
1011 float chi2 = temp_state.
chi2 - state.
chi2;
1012 if ((best_chi2_p[n_p] == -1) || (chi2 < best_chi2_p[n_p])) {
1013 best_chi2_p[n_p] = chi2;
1014 best_ind_p[n_p] =
c;
1019 for (
int n = 3;
n >= 0;
n -= 1) {
1020 if ((best_chi2_p[
n] > 0.) && (best_chi2_p[
n] < 25.)) {
1026 vector<SimpleHit3D> inner_hits;
1028 for (
int l = 2; l >= 0; l -= 1) {
1029 int c = best_ind_p[best_np];
1030 if (inner_combos[c][l] >= 0) {
1031 inner_hits.push_back(hits[inner_combos[c][l]]);
1038 for (
int i = (((
int)(inner_hits.size())) - 1); i >= 0; i -= 1) {
1039 new_hits.push_back(inner_hits[i]);
1041 for (
unsigned int i = 0; i < temp_track.
hits.size(); ++i) {
1042 new_hits.push_back(temp_track.
hits[i]);
1047 (temp_track.
hits.size() > (
n_layers / 2)) && best_np > 0) {
1048 tracks.push_back(temp_track);
1051 for (
unsigned int i = 0; i < tracks.back().hits.size(); ++i) {
1052 (*hit_used)[tracks.back().hits[i].get_id()] =
true;
1055 gettimeofday(&t2, NULL);
1056 time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1057 time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1058 CAtime += (time2 - time1);
1062 gettimeofday(&t2, NULL);
1063 time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1064 time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1065 CAtime += (time2 - time1);
1070 gettimeofday(&t2, NULL);
1071 time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1072 time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1073 CAtime += (time2 - time1);
1079 vector<SimpleTrack3D>& tracks,
1082 vector<TrackSegment>* cur_seg = &
segments1;
1083 vector<TrackSegment>* next_seg = &
segments2;
1084 unsigned int curseg_size = 0;
1085 unsigned int nextseg_size = 0;
1087 vector<TrackSegment> complete_segments;
1090 for (
unsigned int l = 0; l <
n_layers; ++l) {
1093 for (
unsigned int i = 0; i < hits.size(); ++i) {
1094 unsigned int min = (hits[i].get_layer() - allowed_missing);
1095 if (allowed_missing > (
unsigned int) hits[i].get_layer()) {
1098 for (
int l = min; l <= hits[i].get_layer(); l += 1) {
1102 for (
unsigned int l = 0; l <
n_layers; ++l) {
1112 gettimeofday(&t1, NULL);
1115 float cosang_diff_inv = 1. / cosang_diff;
1119 vector<float> inv_layer;
1120 inv_layer.assign(n_layers, 1.);
1121 for (
unsigned int l = 3; l <
n_layers; ++l) {
1122 inv_layer[l] = 1. / (((float)l) - 2.);
1125 unsigned int hit_counter = 0;
1165 unsigned int hit1[4];
1166 unsigned int hit2[4];
1167 unsigned int hit3[4];
1170 temp_segment.
hits.assign(n_layers, 0);
1172 for (
unsigned int i = 0, sizei =
layer_sorted[0].size(); i < sizei; ++i) {
1173 for (
unsigned int j = 0, sizej =
layer_sorted[1].size(); j < sizej; ++j) {
1174 for (
unsigned int k = 0, sizek =
layer_sorted[2].size();
k < sizek; ++
k) {
1186 dx1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[0][i].get_size(0,0));
1187 dy1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[0][i].get_size(1,1));
1188 dz1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[0][i].get_size(2,2));
1194 dx2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[1][j].get_size(0,0));
1195 dy2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[1][j].get_size(1,1));
1196 dz2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[1][j].get_size(2,2));
1201 dx3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[2][
k].get_size(0,0));
1202 dy3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[2][
k].get_size(1,1));
1203 dz3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[2][
k].get_size(2,2));
1205 hit1[hit_counter] = i;
1206 hit2[hit_counter] = j;
1207 hit3[hit_counter] =
k;
1211 if (hit_counter == 4) {
1213 z3_a, dx1_a, dy1_a, dz1_a, dx2_a, dy2_a, dz2_a,
1214 dx3_a, dy3_a, dz3_a, kappa_a, dkappa_a,
1215 ux_mid_a, uy_mid_a, ux_end_a, uy_end_a,
1216 dzdl_1_a, dzdl_2_a, ddzdl_1_a, ddzdl_2_a);
1218 for (
unsigned int h = 0;
h < hit_counter; ++
h) {
1220 (dzdl_1_a[
h] - dzdl_2_a[
h]) /
1221 (ddzdl_1_a[
h] + ddzdl_2_a[
h] + fabs(dzdl_1_a[
h] * sinang_cut));
1222 temp_segment.
chi2 *= temp_segment.
chi2;
1223 if (temp_segment.
chi2 > 2.0) {
1226 temp_segment.
ux = ux_end_a[
h];
1227 temp_segment.
uy = uy_end_a[
h];
1228 temp_segment.
kappa = kappa_a[
h];
1232 temp_segment.
dkappa = dkappa_a[
h];
1233 temp_segment.
hits[0] = hit1[
h];
1234 temp_segment.
hits[1] = hit2[
h];
1235 temp_segment.
hits[2] = hit3[
h];
1237 unsigned int outer_layer =
1239 if ((outer_layer - 2) > allowed_missing) {
1242 if ((n_layers - 3) <= allowed_missing) {
1243 complete_segments.push_back(temp_segment);
1245 if (next_seg->size() == nextseg_size) {
1246 next_seg->push_back(temp_segment);
1249 (*next_seg)[nextseg_size] = temp_segment;
1259 if (hit_counter != 0) {
1261 dx1_a, dy1_a, dz1_a, dx2_a, dy2_a, dz2_a, dx3_a,
1262 dy3_a, dz3_a, kappa_a, dkappa_a, ux_mid_a, uy_mid_a,
1263 ux_end_a, uy_end_a, dzdl_1_a, dzdl_2_a, ddzdl_1_a,
1266 for (
unsigned int h = 0;
h < hit_counter; ++
h) {
1268 (dzdl_1_a[
h] - dzdl_2_a[
h]) /
1269 (ddzdl_1_a[
h] + ddzdl_2_a[
h] + fabs(dzdl_1_a[
h] * sinang_cut));
1270 temp_segment.
chi2 *= temp_segment.
chi2;
1271 if (temp_segment.
chi2 > 2.0) {
1274 temp_segment.
ux = ux_end_a[
h];
1275 temp_segment.
uy = uy_end_a[
h];
1276 temp_segment.
kappa = kappa_a[
h];
1280 temp_segment.
dkappa = dkappa_a[
h];
1281 temp_segment.
hits[0] = hit1[
h];
1282 temp_segment.
hits[1] = hit2[
h];
1283 temp_segment.
hits[2] = hit3[
h];
1285 unsigned int outer_layer =
layer_sorted[2][temp_segment.
hits[2]].get_layer();
1286 if ((outer_layer - 2) > allowed_missing) {
1289 if ((n_layers - 3) <= allowed_missing) {
1290 complete_segments.push_back(temp_segment);
1292 if (next_seg->size() == nextseg_size) {
1293 next_seg->push_back(temp_segment);
1296 (*next_seg)[nextseg_size] = temp_segment;
1304 swap(cur_seg, next_seg);
1305 swap(curseg_size, nextseg_size);
1307 unsigned int whichseg[4];
1308 for (
unsigned int l = 3; l <
n_layers; ++l) {
1309 if (l == (n_layers - 1)) {
1310 easy_chi2_cut *= 0.25;
1313 for (
unsigned int i = 0, sizei = curseg_size; i < sizei; ++i) {
1314 for (
unsigned int j = 0, sizej =
layer_sorted[l].size(); j < sizej; ++j) {
1315 if ((
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_layer() >=
1320 x1_a[hit_counter] =
layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_x();
1321 y1_a[hit_counter] =
layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_y();
1322 z1_a[hit_counter] =
layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_z();
1323 x2_a[hit_counter] =
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_x();
1324 y2_a[hit_counter] =
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_y();
1325 z2_a[hit_counter] =
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_z();
1330 dx1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_size(0,0));
1331 dy1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_size(1,1));
1332 dz1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_size(2,2));
1333 dx2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_size(0,0));
1334 dy2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_size(1,1));
1335 dz2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_size(2,2));
1336 dx3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l][j].get_size(0,0));
1337 dy3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l][j].get_size(1,1));
1338 dz3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(
layer_sorted[l][j].get_size(2,2));
1340 cur_kappa_a[hit_counter] = (*cur_seg)[i].kappa;
1341 cur_dkappa_a[hit_counter] = (*cur_seg)[i].dkappa;
1342 cur_ux_a[hit_counter] = (*cur_seg)[i].ux;
1343 cur_uy_a[hit_counter] = (*cur_seg)[i].uy;
1344 cur_chi2_a[hit_counter] = (*cur_seg)[i].chi2;
1346 whichseg[hit_counter] = i;
1347 hit1[hit_counter] = j;
1350 if (hit_counter == 4) {
1352 x1_a, y1_a, z1_a, x2_a, y2_a, z2_a, x3_a, y3_a, z3_a, dx1_a,
1353 dy1_a, dz1_a, dx2_a, dy2_a, dz2_a, dx3_a, dy3_a, dz3_a, kappa_a,
1354 dkappa_a, ux_mid_a, uy_mid_a, ux_end_a, uy_end_a, dzdl_1_a,
1355 dzdl_2_a, ddzdl_1_a, ddzdl_2_a, sinang_cut, cosang_diff_inv,
1356 cur_kappa_a, cur_dkappa_a, cur_ux_a, cur_uy_a, cur_chi2_a,
1359 for (
unsigned int h = 0;
h < hit_counter; ++
h) {
1360 if ((chi2_a[
h]) * inv_layer[l] < easy_chi2_cut) {
1361 temp_segment.
chi2 = chi2_a[
h];
1362 temp_segment.
ux = ux_end_a[
h];
1363 temp_segment.
uy = uy_end_a[
h];
1364 temp_segment.
kappa = kappa_a[
h];
1368 temp_segment.
dkappa = dkappa_a[
h];
1369 for (
unsigned int ll = 0; ll < l; ++ll) {
1370 temp_segment.
hits[ll] = (*cur_seg)[whichseg[
h]].hits[ll];
1372 temp_segment.
hits[l] = hit1[
h];
1373 unsigned int outer_layer =
1375 temp_segment.
n_hits = l + 1;
1376 if ((n_layers - (l + 1)) <= allowed_missing) {
1377 complete_segments.push_back(temp_segment);
1379 if ((outer_layer - l) > allowed_missing) {
1382 if (next_seg->size() == nextseg_size) {
1383 next_seg->push_back(temp_segment);
1386 (*next_seg)[nextseg_size] = temp_segment;
1395 if (hit_counter != 0) {
1397 x1_a, y1_a, z1_a, x2_a, y2_a, z2_a, x3_a, y3_a, z3_a, dx1_a, dy1_a,
1398 dz1_a, dx2_a, dy2_a, dz2_a, dx3_a, dy3_a, dz3_a, kappa_a, dkappa_a,
1399 ux_mid_a, uy_mid_a, ux_end_a, uy_end_a, dzdl_1_a, dzdl_2_a, ddzdl_1_a,
1400 ddzdl_2_a, sinang_cut, cosang_diff_inv, cur_kappa_a, cur_dkappa_a,
1401 cur_ux_a, cur_uy_a, cur_chi2_a, chi2_a);
1403 for (
unsigned int h = 0;
h < hit_counter; ++
h) {
1404 if ((chi2_a[
h]) * inv_layer[l] < easy_chi2_cut) {
1405 temp_segment.
chi2 = chi2_a[
h];
1406 temp_segment.
ux = ux_end_a[
h];
1407 temp_segment.
uy = uy_end_a[
h];
1408 temp_segment.
kappa = kappa_a[
h];
1412 temp_segment.
dkappa = dkappa_a[
h];
1413 for (
unsigned int ll = 0; ll < l; ++ll) {
1414 temp_segment.
hits[ll] = (*cur_seg)[whichseg[
h]].hits[ll];
1416 temp_segment.
hits[l] = hit1[
h];
1417 unsigned int outer_layer =
1419 temp_segment.
n_hits = l + 1;
1420 if ((n_layers - (l + 1)) <= allowed_missing) {
1421 complete_segments.push_back(temp_segment);
1423 if ((outer_layer - l) > allowed_missing) {
1426 if (next_seg->size() == nextseg_size) {
1427 next_seg->push_back(temp_segment);
1430 (*next_seg)[nextseg_size] = temp_segment;
1437 swap(cur_seg, next_seg);
1438 swap(curseg_size, nextseg_size);
1440 for (
unsigned int i = 0; i < complete_segments.size(); ++i) {
1441 if (cur_seg->size() == curseg_size) {
1442 cur_seg->push_back(complete_segments[i]);
1445 (*cur_seg)[curseg_size] = complete_segments[i];
1450 gettimeofday(&t2, NULL);
1451 time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1452 time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1453 CAtime += (time2 - time1);
1456 vector<SimpleHit3D> temp_hits;
1457 for (
unsigned int i = 0, sizei = curseg_size; i < sizei; ++i) {
1460 temp_comb.assign((*cur_seg)[i].n_hits, 0);
1461 for (
unsigned int l = 0; l < (*cur_seg)[i].n_hits; ++l) {
1466 if (it !=
combos.end()) {
1469 if (
combos.size() > 10000) {
1474 for (
unsigned int l = 0; l < (*cur_seg)[i].n_hits; ++l) {
1478 gettimeofday(&t1, NULL);
1480 float init_chi2 =
fitTrack(temp_track);
1485 gettimeofday(&t2, NULL);
1486 time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1487 time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1493 state.
phi = temp_track.
phi;
1494 if (state.
phi < 0.) {
1497 state.
d = temp_track.
d;
1500 state.
z0 = temp_track.
z0;
1502 state.
C = Matrix<float, 5, 5>::Zero(5, 5);
1503 state.
C(0, 0) = pow(0.01, 2.);
1504 state.
C(1, 1) = pow(0.01, 2.);
1505 state.
C(2, 2) = pow(0.01 * state.
nu, 2.);
1506 state.
C(3, 3) = pow(0.05, 2.);
1507 state.
C(4, 4) = pow(0.05, 2.);
1513 for (
unsigned int h = 0;
h < temp_track.
hits.size(); ++
h) {
1522 gettimeofday(&t2, NULL);
1523 time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1524 time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1527 if (!(temp_track.
kappa == temp_track.
kappa)) {
1536 if (state.
chi2 / (2. * ((
float)(temp_track.
hits.size())) - 5.) >
chi2_cut) {
1541 if (fabs(temp_track.
d) >
dca_cut) {
1548 tracks.push_back(temp_track);
1552 for (
unsigned int i = 0; i < temp_track.
hits.size(); ++i) {
1553 (*hit_used)[temp_track.
hits[i].get_id()] =
true;
1561 float* x1_a,
float* y1_a,
float* z1_a,
float* x2_a,
float* y2_a,
1562 float* z2_a,
float* x3_a,
float* y3_a,
float* z3_a,
float* dx1_a,
1563 float* dy1_a,
float* dz1_a,
float* dx2_a,
float* dy2_a,
float* dz2_a,
1564 float* dx3_a,
float* dy3_a,
float* dz3_a,
float* kappa_a,
float* dkappa_a,
1565 float* ux_mid_a,
float* uy_mid_a,
float* ux_end_a,
float* uy_end_a,
1566 float* dzdl_1_a,
float* dzdl_2_a,
float* ddzdl_1_a,
float* ddzdl_2_a) {
1567 static const __m128
two = {2., 2., 2., 2.};
1569 __m128
x1 = _mm_load_ps(x1_a);
1570 __m128
x2 = _mm_load_ps(x2_a);
1571 __m128
x3 = _mm_load_ps(x3_a);
1572 __m128
y1 = _mm_load_ps(y1_a);
1573 __m128
y2 = _mm_load_ps(y2_a);
1574 __m128
y3 = _mm_load_ps(y3_a);
1575 __m128
z1 = _mm_load_ps(z1_a);
1576 __m128
z2 = _mm_load_ps(z2_a);
1577 __m128 z3 = _mm_load_ps(z3_a);
1579 __m128 dx1 = _mm_load_ps(dx1_a);
1580 __m128 dx2 = _mm_load_ps(dx2_a);
1581 __m128 dx3 = _mm_load_ps(dx3_a);
1582 __m128 dy1 = _mm_load_ps(dy1_a);
1583 __m128 dy2 = _mm_load_ps(dy2_a);
1584 __m128 dy3 = _mm_load_ps(dy3_a);
1585 __m128 dz1 = _mm_load_ps(dz1_a);
1586 __m128
dz2 = _mm_load_ps(dz2_a);
1587 __m128 dz3 = _mm_load_ps(dz3_a);
1589 __m128 D12 = _mm_sub_ps(x2, x1);
1590 D12 = _mm_mul_ps(D12, D12);
1591 __m128
tmp1 = _mm_sub_ps(y2, y1);
1592 tmp1 = _mm_mul_ps(tmp1, tmp1);
1593 D12 = _mm_add_ps(D12, tmp1);
1594 D12 = _vec_sqrt_ps(D12);
1596 __m128 D23 = _mm_sub_ps(x3, x2);
1597 D23 = _mm_mul_ps(D23, D23);
1598 tmp1 = _mm_sub_ps(y3, y2);
1599 tmp1 = _mm_mul_ps(tmp1, tmp1);
1600 D23 = _mm_add_ps(D23, tmp1);
1601 D23 = _vec_sqrt_ps(D23);
1602 __m128 D31 = _mm_sub_ps(x1, x3);
1603 D31 = _mm_mul_ps(D31, D31);
1604 tmp1 = _mm_sub_ps(y1, y3);
1605 tmp1 = _mm_mul_ps(tmp1, tmp1);
1606 D31 = _mm_add_ps(D31, tmp1);
1607 D31 = _vec_sqrt_ps(D31);
1609 __m128
k = _mm_mul_ps(D12, D23);
1610 k = _mm_mul_ps(k, D31);
1612 tmp1 = (D12 + D23 + D31) * (D23 + D31 - D12) * (D12 + D31 - D23) *
1614 tmp1 = _vec_sqrt_ps(tmp1);
1617 __m128
tmp2 = _mm_cmpgt_ps(tmp1,
zero);
1618 tmp1 = _mm_and_ps(tmp2, k);
1619 tmp2 = _mm_andnot_ps(tmp2,
zero);
1620 k = _mm_xor_ps(tmp1, tmp2);
1622 _mm_store_ps(kappa_a, k);
1623 __m128 k_inv = _vec_rec_ps(k);
1625 __m128 D12_inv = _vec_rec_ps(D12);
1626 __m128 D23_inv = _vec_rec_ps(D23);
1627 __m128 D31_inv = _vec_rec_ps(D31);
1629 __m128 dr1 = dx1 * dx1 + dy1 * dy1;
1630 dr1 = _vec_sqrt_ps(dr1);
1631 __m128 dr2 = dx2 * dx2 + dy2 * dy2;
1632 dr2 = _vec_sqrt_ps(dr2);
1633 __m128 dr3 = dx3 * dx3 + dy3 * dy3;
1634 dr3 = _vec_sqrt_ps(dr3);
1636 __m128 dk1 = (dr1 + dr2) * D12_inv * D12_inv;
1637 __m128 dk2 = (dr2 + dr3) * D23_inv * D23_inv;
1638 __m128 dk = dk1 + dk2;
1639 _mm_store_ps(dkappa_a, dk);
1641 __m128 ux12 = (x2 -
x1) * D12_inv;
1642 __m128 uy12 = (y2 -
y1) * D12_inv;
1643 __m128 ux23 = (x3 -
x2) * D23_inv;
1644 __m128 uy23 = (y3 -
y2) * D23_inv;
1645 __m128 ux13 = (x3 -
x1) * D31_inv;
1646 __m128 uy13 = (y3 -
y1) * D31_inv;
1648 __m128 cosalpha = ux12 * ux13 + uy12 * uy13;
1649 __m128 sinalpha = ux13 * uy12 - ux12 * uy13;
1651 __m128 ux_mid = ux23 * cosalpha - uy23 * sinalpha;
1652 __m128 uy_mid = ux23 * sinalpha + uy23 * cosalpha;
1653 _mm_store_ps(ux_mid_a, ux_mid);
1654 _mm_store_ps(uy_mid_a, uy_mid);
1656 __m128 ux_end = ux23 * cosalpha + uy23 * sinalpha;
1657 __m128 uy_end = uy23 * cosalpha - ux23 * sinalpha;
1659 _mm_store_ps(ux_end_a, ux_end);
1660 _mm_store_ps(uy_end_a, uy_end);
1662 __m128
v =
one - sinalpha * sinalpha;
1663 v = _vec_sqrt_ps(v);
1667 __m128 s2 = _vec_atan_ps(v);
1670 tmp1 = _mm_cmpgt_ps(k,
zero);
1671 tmp2 = _mm_and_ps(tmp1, s2);
1672 tmp1 = _mm_andnot_ps(tmp1, D23);
1674 s2 = _mm_xor_ps(tmp1, tmp2);
1678 __m128 del_z_2 = z3 -
z2;
1679 __m128 dzdl_2 = s2 * s2 + del_z_2 * del_z_2;
1680 dzdl_2 = _vec_rsqrt_ps(dzdl_2);
1682 __m128 ddzdl_2 = (dz2 + dz3) * D23_inv;
1683 _mm_store_ps(dzdl_2_a, dzdl_2);
1684 _mm_store_ps(ddzdl_2_a, ddzdl_2);
1686 sinalpha = ux13 * uy23 - ux23 * uy13;
1687 v =
one - sinalpha * sinalpha;
1688 v = _vec_sqrt_ps(v);
1692 __m128
s1 = _vec_atan_ps(v);
1695 tmp1 = _mm_cmpgt_ps(k,
zero);
1696 tmp2 = _mm_and_ps(tmp1, s1);
1697 tmp1 = _mm_andnot_ps(tmp1, D12);
1698 s1 = _mm_xor_ps(tmp1, tmp2);
1700 __m128 del_z_1 = z2 -
z1;
1701 __m128 dzdl_1 = s1 * s1 + del_z_1 * del_z_1;
1702 dzdl_1 = _vec_rsqrt_ps(dzdl_1);
1704 __m128 ddzdl_1 = (dz1 +
dz2) * D12_inv;
1705 _mm_store_ps(dzdl_1_a, dzdl_1);
1706 _mm_store_ps(ddzdl_1_a, ddzdl_1);
1712 float* x1_a,
float* y1_a,
float* z1_a,
float* x2_a,
float* y2_a,
1713 float* z2_a,
float* x3_a,
float* y3_a,
float* z3_a,
float* dx1_a,
1714 float* dy1_a,
float* dz1_a,
float* dx2_a,
float* dy2_a,
float* dz2_a,
1715 float* dx3_a,
float* dy3_a,
float* dz3_a,
float* kappa_a,
float* dkappa_a,
1716 float* ux_mid_a,
float* uy_mid_a,
float* ux_end_a,
float* uy_end_a,
1717 float* dzdl_1_a,
float* dzdl_2_a,
float* ddzdl_1_a,
float* ddzdl_2_a,
1718 float sinang_cut,
float cosang_diff_inv,
float* cur_kappa_a,
1719 float* cur_dkappa_a,
float* cur_ux_a,
float* cur_uy_a,
float* cur_chi2_a,
1721 static const __m128
two = {2., 2., 2., 2.};
1723 __m128
x1 = _mm_load_ps(x1_a);
1724 __m128
x2 = _mm_load_ps(x2_a);
1725 __m128
x3 = _mm_load_ps(x3_a);
1726 __m128
y1 = _mm_load_ps(y1_a);
1727 __m128
y2 = _mm_load_ps(y2_a);
1728 __m128
y3 = _mm_load_ps(y3_a);
1729 __m128
z1 = _mm_load_ps(z1_a);
1730 __m128
z2 = _mm_load_ps(z2_a);
1731 __m128 z3 = _mm_load_ps(z3_a);
1733 __m128 dx1 = _mm_load_ps(dx1_a);
1734 __m128 dx2 = _mm_load_ps(dx2_a);
1735 __m128 dx3 = _mm_load_ps(dx3_a);
1736 __m128 dy1 = _mm_load_ps(dy1_a);
1737 __m128 dy2 = _mm_load_ps(dy2_a);
1738 __m128 dy3 = _mm_load_ps(dy3_a);
1739 __m128 dz1 = _mm_load_ps(dz1_a);
1740 __m128
dz2 = _mm_load_ps(dz2_a);
1741 __m128 dz3 = _mm_load_ps(dz3_a);
1743 __m128 D12 = _mm_sub_ps(x2, x1);
1744 D12 = _mm_mul_ps(D12, D12);
1745 __m128
tmp1 = _mm_sub_ps(y2, y1);
1746 tmp1 = _mm_mul_ps(tmp1, tmp1);
1747 D12 = _mm_add_ps(D12, tmp1);
1748 D12 = _vec_sqrt_ps(D12);
1750 __m128 D23 = _mm_sub_ps(x3, x2);
1751 D23 = _mm_mul_ps(D23, D23);
1752 tmp1 = _mm_sub_ps(y3, y2);
1753 tmp1 = _mm_mul_ps(tmp1, tmp1);
1754 D23 = _mm_add_ps(D23, tmp1);
1755 D23 = _vec_sqrt_ps(D23);
1757 __m128 D31 = _mm_sub_ps(x1, x3);
1758 D31 = _mm_mul_ps(D31, D31);
1759 tmp1 = _mm_sub_ps(y1, y3);
1760 tmp1 = _mm_mul_ps(tmp1, tmp1);
1761 D31 = _mm_add_ps(D31, tmp1);
1762 D31 = _vec_sqrt_ps(D31);
1764 __m128
k = _mm_mul_ps(D12, D23);
1765 k = _mm_mul_ps(k, D31);
1767 tmp1 = (D12 + D23 + D31) * (D23 + D31 - D12) * (D12 + D31 - D23) *
1769 tmp1 = _vec_sqrt_ps(tmp1);
1772 __m128
tmp2 = _mm_cmpgt_ps(tmp1,
zero);
1773 tmp1 = _mm_and_ps(tmp2, k);
1774 tmp2 = _mm_andnot_ps(tmp2,
zero);
1775 k = _mm_xor_ps(tmp1, tmp2);
1777 _mm_store_ps(kappa_a, k);
1778 __m128 k_inv = _vec_rec_ps(k);
1780 __m128 D12_inv = _vec_rec_ps(D12);
1781 __m128 D23_inv = _vec_rec_ps(D23);
1782 __m128 D31_inv = _vec_rec_ps(D31);
1784 __m128 dr1 = dx1 * dx1 + dy1 * dy1;
1785 dr1 = _vec_sqrt_ps(dr1);
1786 __m128 dr2 = dx2 * dx2 + dy2 * dy2;
1787 dr2 = _vec_sqrt_ps(dr2);
1788 __m128 dr3 = dx3 * dx3 + dy3 * dy3;
1789 dr3 = _vec_sqrt_ps(dr3);
1791 __m128 dk1 = (dr1 + dr2) * D12_inv * D12_inv;
1792 __m128 dk2 = (dr2 + dr3) * D23_inv * D23_inv;
1793 __m128 dk = dk1 + dk2;
1794 _mm_store_ps(dkappa_a, dk);
1796 __m128 ux12 = (x2 -
x1) * D12_inv;
1797 __m128 uy12 = (y2 -
y1) * D12_inv;
1798 __m128 ux23 = (x3 -
x2) * D23_inv;
1799 __m128 uy23 = (y3 -
y2) * D23_inv;
1800 __m128 ux13 = (x3 -
x1) * D31_inv;
1801 __m128 uy13 = (y3 -
y1) * D31_inv;
1803 __m128 cosalpha = ux12 * ux13 + uy12 * uy13;
1804 __m128 sinalpha = ux13 * uy12 - ux12 * uy13;
1806 __m128 ux_mid = ux23 * cosalpha - uy23 * sinalpha;
1807 __m128 uy_mid = ux23 * sinalpha + uy23 * cosalpha;
1808 _mm_store_ps(ux_mid_a, ux_mid);
1809 _mm_store_ps(uy_mid_a, uy_mid);
1811 __m128 ux_end = ux23 * cosalpha + uy23 * sinalpha;
1812 __m128 uy_end = uy23 * cosalpha - ux23 * sinalpha;
1814 _mm_store_ps(ux_end_a, ux_end);
1815 _mm_store_ps(uy_end_a, uy_end);
1818 __m128
v =
one - sinalpha * sinalpha;
1819 v = _vec_sqrt_ps(v);
1823 __m128 s2 = _vec_atan_ps(v);
1826 tmp1 = _mm_cmpgt_ps(k,
zero);
1827 tmp2 = _mm_and_ps(tmp1, s2);
1828 tmp1 = _mm_andnot_ps(tmp1, D23);
1829 s2 = _mm_xor_ps(tmp1, tmp2);
1833 __m128 del_z_2 = z3 -
z2;
1834 __m128 dzdl_2 = s2 * s2 + del_z_2 * del_z_2;
1835 dzdl_2 = _vec_rsqrt_ps(dzdl_2);
1837 __m128 ddzdl_2 = (dz2 + dz3) * D23_inv;
1838 _mm_store_ps(dzdl_2_a, dzdl_2);
1839 _mm_store_ps(ddzdl_2_a, ddzdl_2);
1841 sinalpha = ux13 * uy23 - ux23 * uy13;
1842 v =
one - sinalpha * sinalpha;
1843 v = _vec_sqrt_ps(v);
1847 __m128
s1 = _vec_atan_ps(v);
1850 tmp1 = _mm_cmpgt_ps(k,
zero);
1851 tmp2 = _mm_and_ps(tmp1, s1);
1852 tmp1 = _mm_andnot_ps(tmp1, D12);
1853 s1 = _mm_xor_ps(tmp1, tmp2);
1855 __m128 del_z_1 = z2 -
z1;
1856 __m128 dzdl_1 = s1 * s1 + del_z_1 * del_z_1;
1857 dzdl_1 = _vec_rsqrt_ps(dzdl_1);
1859 __m128 ddzdl_1 = (dz1 +
dz2) * D12_inv;
1860 _mm_store_ps(dzdl_1_a, dzdl_1);
1861 _mm_store_ps(ddzdl_1_a, ddzdl_1);
1862 __m128 c_dk = _mm_load_ps(cur_dkappa_a);
1863 __m128 c_k = _mm_load_ps(cur_kappa_a);
1864 __m128 c_ux = _mm_load_ps(cur_ux_a);
1865 __m128 c_uy = _mm_load_ps(cur_uy_a);
1866 __m128 c_chi2 = _mm_load_ps(cur_chi2_a);
1867 __m128 sinang = _mm_load1_ps(&sinang_cut);
1868 __m128 cosdiff = _mm_load1_ps(&cosang_diff_inv);
1870 __m128 kdiff = c_k -
k;
1871 __m128 n_dk = c_dk + dk + sinang *
k;
1872 __m128 chi2_k = kdiff * kdiff / (n_dk * n_dk);
1873 __m128 cos_scatter = c_ux * ux_mid + c_uy * uy_mid;
1875 (
one - cos_scatter) * (
one - cos_scatter) * cosdiff * cosdiff;
1876 tmp1 = dzdl_1 * sinang;
1878 __m128 chi2_dzdl = (dzdl_1 - dzdl_2) / (ddzdl_1 + ddzdl_2 + tmp1);
1879 chi2_dzdl *= chi2_dzdl;
1882 __m128 n_chi2 = c_chi2 + chi2_ang + chi2_k + chi2_dzdl;
1883 _mm_store_ps(chi2_a, n_chi2);