ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HelixHough_z0Range_sse.cpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file HelixHough_z0Range_sse.cpp
1 #include "vector_math_inline.h"
2 #include "HelixHough.h"
3 #include <math.h>
4 #include <iostream>
5 
6 using namespace std;
7 
8 
9 static const __m128 one_o_10 = {0.1,0.1,0.1,0.1};
10 static const __m128 one_o_2 = {0.5,0.5,0.5,0.5};
11 static const __m128 close_one = {0.999,0.999,0.999,0.999};
12 static const __m128 two = {2., 2., 2., 2.};
13 static const __m128 four = {4., 4., 4., 4.};
14 static const __m128 one_o_3 = {0.3333333333333333333, 0.3333333333333333333, 0.3333333333333333333, 0.3333333333333333333};
15 static const __m128 _3_o_20 = {0.15, 0.15, 0.15, 0.15};
16 static const __m128 _5_o_56 = {8.92857142857142877e-02, 8.92857142857142877e-02, 8.92857142857142877e-02, 8.92857142857142877e-02};
17 static const __m128 SIGNMASK = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
18 
19 void HelixHough::z0Range_sse(const SimpleHit3D& hit, float cosphi1, float sinphi1, float cosphi2, float sinphi2, float min_k, float max_k, float min_phi, float max_phi, float min_d, float max_d, float min_theta, float max_theta, float& min_z0, float& max_z0)
20 {
21  float z0_1[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
22 
23  __m128 v_z0_1 = _mm_load_ps(z0_1);
24 
25  float x_arr[4] __attribute__((aligned(16))) = {hit.x,hit.x,hit.x,hit.x};
26  float y_arr[4] __attribute__((aligned(16))) = {hit.y,hit.y,hit.y,hit.y};
27  float z_arr[4] __attribute__((aligned(16))) = {hit.z,hit.z,hit.z,hit.z};
28 
29  __m128 v_x = _mm_load_ps(x_arr);
30  __m128 v_y = _mm_load_ps(y_arr);
31  __m128 v_z = _mm_load_ps(z_arr);
32 
33 
34  // p0,p0,p1,p1 d0,d0,d1,d1 k0,k0,k1,k1 t0,t1,t0,t1
35 
36  float cosphi_1[4] __attribute__((aligned(16))) = {cosphi1, cosphi1, cosphi2, cosphi2}; __m128 v_cosphi_1 = _mm_load_ps(cosphi_1);
37  float sinphi_1[4] __attribute__((aligned(16))) = {sinphi1, sinphi1, sinphi2, sinphi2}; __m128 v_sinphi_1 = _mm_load_ps(sinphi_1);
38  float d_1[4] __attribute__((aligned(16))) = {min_d, min_d, max_d, max_d}; __m128 v_d_1 = _mm_load_ps(d_1);
39  float k_1[4] __attribute__((aligned(16))) = {min_k, min_k, max_k, max_k}; __m128 v_k_1 = _mm_load_ps(k_1);
40  float t_1[4] __attribute__((aligned(16))) = {min_theta, max_theta, min_theta, max_theta}; __m128 v_t_1 = _mm_load_ps(t_1);
41 
42 
43  //first 4
44  __m128 v_dx = _mm_mul_ps(v_cosphi_1, v_d_1);
45  __m128 v_dy = _mm_mul_ps(v_sinphi_1, v_d_1);
46  __m128 tmp1 = _mm_sub_ps(v_x, v_dx);
47  tmp1 = _mm_mul_ps(tmp1, tmp1);
48  __m128 v_D = _mm_sub_ps(v_y, v_dy);
49  v_D = _mm_mul_ps(v_D, v_D);
50  v_D = _mm_add_ps(v_D, tmp1);
51  v_D = _mm_sqrt_ps(v_D);
52  __m128 v_v = _mm_mul_ps(one_o_2, v_k_1);
53  v_v = _mm_mul_ps(v_v, v_D);
54  //if(v > 0.999){v = 0.999;}
55  tmp1 = _mm_cmpgt_ps(v_v, close_one);
56  __m128 tmp2 = _mm_and_ps(tmp1, close_one);
57  __m128 tmp3 = _mm_andnot_ps(tmp1, v_v);
58  v_v = _mm_xor_ps(tmp2, tmp3);
59  //power series assuming v_v is small
60  __m128 v_s = zero;
61  __m128 temp1 = _mm_mul_ps(v_v, v_v);
62  __m128 temp2 = _mm_mul_ps(one_o_2, v_D);
63  tmp1 = _mm_mul_ps(two, temp2);
64  v_s = _mm_add_ps(v_s, tmp1);
65  temp2 = _mm_mul_ps(temp2, temp1);
66  tmp1 = _mm_mul_ps(temp2, one_o_3);
67  v_s = _mm_add_ps(v_s, tmp1);
68  temp2 = _mm_mul_ps(temp2, temp1);
69  tmp1 = _mm_mul_ps(temp2, _3_o_20);
70  v_s = _mm_add_ps(v_s, tmp1);
71  temp2 = _mm_mul_ps(temp2, temp1);
72  tmp1 = _mm_mul_ps(temp2, _5_o_56);
73  v_s = _mm_add_ps(v_s, tmp1);
75  //otherwise we calculate an arcsin
76  //asin(x) = 2*atan( x/( 1 + sqrt( 1 - x*x ) ) )
77  //s = 2*asin(v)/k
78  tmp1 = _mm_mul_ps(v_v, v_v);
79  tmp1 = _mm_sub_ps(one, tmp1);
80  tmp1 = _mm_sqrt_ps(tmp1);
81  tmp1 = _mm_add_ps(one, tmp1);
82  tmp1 = _mm_div_ps(v_v, tmp1);
83  tmp2 = _vec_atan_ps(tmp1);
84  tmp2 = _mm_mul_ps(four, tmp2);
85  tmp2 = _mm_div_ps(tmp2, v_k_1);
87  //choose between the two methods to calculate s
88  tmp1 = _mm_cmpgt_ps(v_v, one_o_10);
89  tmp3 = _mm_and_ps(tmp1, tmp2);
90  tmp2 = _mm_andnot_ps(tmp1, v_s);
91  v_s = _mm_xor_ps(tmp3, tmp2);
93  tmp1 = _mm_mul_ps(v_s, v_s);
94  tmp2 = _mm_mul_ps(v_t_1, v_t_1);
95  tmp3 = _mm_mul_ps(tmp1, tmp2);
96  __m128 tmp4 = _mm_sub_ps(one, tmp2);
97  tmp4 = _mm_div_ps(tmp3, tmp4);
98  tmp4 = _mm_sqrt_ps(tmp4);
99  //if(t > 0){dz = -tmp4;}
100  //else{dz = tmp4;}
101  tmp3 = _mm_xor_ps(tmp4, SIGNMASK);
102  tmp1 = _mm_cmpgt_ps(v_t_1, zero);
103  tmp2 = _mm_and_ps(tmp1, tmp3);
104  __m128 v_dz = _mm_andnot_ps(tmp1, tmp4);
105  v_dz = _mm_xor_ps(tmp2, v_dz);
106  v_z0_1 = _mm_add_ps(v_dz, v_z);
107 
108 
109  float z0_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
110  _mm_store_ps(z0_a, v_z0_1);
111 
112  min_z0 = z0_a[0];
113  if (z0_a[1] < min_z0) min_z0 = z0_a[1] ;
114  if (z0_a[2] < min_z0) min_z0 = z0_a[2] ;
115  if (z0_a[3] < min_z0) min_z0 = z0_a[3] ;
116 
117  max_z0 = z0_a[0];
118  if (z0_a[1] > max_z0) max_z0 = z0_a[1] ;
119  if (z0_a[2] > max_z0) max_z0 = z0_a[2] ;
120  if (z0_a[3] > max_z0) max_z0 = z0_a[3] ;
121 
122 
123  min_z0 -= hit.dz;
124  max_z0 += hit.dz;
125 }