ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
dfe_io_numpy.hpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file dfe_io_numpy.hpp
1 // SPDX-License-Identifier: MIT
2 // Copyright 2015,2018-2019 Moritz Kiehn
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 // SOFTWARE.
21 
26 
27 #pragma once
28 
29 #include <array>
30 #include <cstdint>
31 #include <fstream>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 
36 namespace dfe {
37 
43 template<typename NamedTuple>
45 public:
46  NamedTupleNumpyWriter() = delete;
52 
54  NamedTupleNumpyWriter(const std::string& path);
55 
57  void append(const NamedTuple& record);
58 
59 private:
60  // the equivalent std::tuple-like type
61  using Tuple = typename NamedTuple::Tuple;
62 
63  std::ofstream m_file;
64  std::size_t m_fixed_header_length;
65  std::size_t m_num_tuples;
66 
67  void write_header(std::size_t num_tuples);
68  template<std::size_t... I>
69  void write_record(const NamedTuple& record, std::index_sequence<I...>);
70  template<typename T>
71  void write_bytes(const T* ptr);
72 };
73 
74 // implementation helpers
75 namespace io_npy_impl {
76 
77 template<typename T>
78 constexpr std::enable_if_t<false, T> kNumpyDtypeCode;
79 template<>
80 constexpr const char* kNumpyDtypeCode<uint8_t> = "u1";
81 template<>
82 constexpr const char* kNumpyDtypeCode<uint16_t> = "u2";
83 template<>
84 constexpr const char* kNumpyDtypeCode<uint32_t> = "u4";
85 template<>
86 constexpr const char* kNumpyDtypeCode<uint64_t> = "u8";
87 template<>
88 constexpr const char* kNumpyDtypeCode<int8_t> = "i1";
89 template<>
90 constexpr const char* kNumpyDtypeCode<int16_t> = "i2";
91 template<>
92 constexpr const char* kNumpyDtypeCode<int32_t> = "i4";
93 template<>
94 constexpr const char* kNumpyDtypeCode<int64_t> = "i8";
95 template<>
96 constexpr const char* kNumpyDtypeCode<float> = "f4";
97 template<>
98 constexpr const char* kNumpyDtypeCode<double> = "f8";
99 template<>
100 constexpr const char* kNumpyDtypeCode<bool> = "?";
101 
102 template<typename... Types>
103 constexpr std::array<const char*, sizeof...(Types)>
104 dtypes_codes(const std::tuple<Types...>&) {
105  return {kNumpyDtypeCode<typename std::decay<Types>::type>...};
106 }
107 
108 // Determines endianness and return the corresponding dtype code modifier.
109 //
110 // Derived from:
111 // https://stackoverflow.com/questions/1001307/detecting-endianness-programmatically-in-a-c-program
112 inline char
114  union {
115  uint32_t i;
116  char c[4];
117  } x = {0x0A0B0C0D};
118  bool is_little_endian =
119  (x.c[0] == 0xD) and (x.c[1] == 0xC) and (x.c[2] == 0xB) and (x.c[3] == 0xA);
120  // TODO this assumes that only little and big endian exists and checks only
121  // for little. maybe verify that it always is one or the other?
122  return is_little_endian ? '<' : '>';
123 }
124 
125 template<typename NamedTuple>
126 inline std::string
127 dtypes_description(const NamedTuple& nt) {
128  std::string descr;
130  auto names = nt.names();
131  auto codes = dtypes_codes(nt.tuple());
132  auto endianness_modifier = dtype_endianness_modifier();
133  descr += '[';
134  for (decltype(n) i = 0; i < n; ++i) {
135  descr += "('";
136  descr += names[i];
137  descr += "', '";
138  descr += endianness_modifier;
139  descr += codes[i];
140  descr += "')";
141  if ((i + 1) < n) {
142  descr += ", ";
143  }
144  }
145  descr += ']';
146  return descr;
147 }
148 
149 } // namespace io_npy_impl
150 
151 // implementation
152 
153 template<typename NamedTuple>
155  const std::string& path)
156  : m_fixed_header_length(0), m_num_tuples(0) {
157  // make our life easier. always throw on error
158  m_file.exceptions(std::ofstream::badbit | std::ofstream::failbit);
159  m_file.open(
160  path, std::ios_base::binary | std::ios_base::out | std::ios_base::trunc);
161  // write a header that uses the maximum amount of space, i.e. biggest
162  // possible number of ntuples, so that we have enough space when we
163  // overwrite it w/ the actual number of tuples at closing time.
164  write_header(SIZE_MAX);
165  write_header(0);
166 }
167 
168 template<typename NamedTuple>
170  if (!m_file.is_open()) {
171  return;
172  }
173  write_header(m_num_tuples);
174  m_file.close();
175 }
176 
177 template<typename NamedTuple>
178 inline void
179 NamedTupleNumpyWriter<NamedTuple>::append(const NamedTuple& record) {
180  write_record(
181  record, std::make_index_sequence<std::tuple_size<Tuple>::value>{});
182  m_num_tuples += 1;
183 }
184 
185 template<typename NamedTuple>
186 inline void
188  std::string header;
189  // magic
190  header += "\x93NUMPY";
191  // fixed version number (major, minor), 1byte unsigned each
192  header += static_cast<char>(0x1);
193  header += static_cast<char>(0x0);
194  // placeholder value for the header length, 2byte little endian unsigned
195  header += static_cast<char>(0xAF);
196  header += static_cast<char>(0xFE);
197  // python dict w/ data type and size information
198  header += "{'descr': ";
199  header += io_npy_impl::dtypes_description(NamedTuple());
200  header += ", 'fortran_order': False";
201  header += ", 'shape': (";
202  header += std::to_string(num_tuples);
203  header += ",)}";
204  // padd w/ spaces for 16 byte alignment of the whole header
205  while (((header.size() + 1) % 16) != 0) {
206  header += ' ';
207  }
208  // the initial header fixes the available header size. updated headers
209  // must always occupy the same space and might require additional
210  // padding spaces
211  if (m_fixed_header_length == 0) {
212  m_fixed_header_length = header.size();
213  } else {
214  while (header.size() < m_fixed_header_length) {
215  header += ' ';
216  }
217  }
218  header += '\n';
219  // replace the header length place holder
220  std::size_t header_length = header.size() - 10;
221  header[8] = static_cast<char>(header_length >> 0);
222  header[9] = static_cast<char>(header_length >> 8);
223  m_file.seekp(0);
224  m_file.write(header.data(), header.size());
225 }
226 
227 template<typename NamedTuple>
228 template<std::size_t... I>
229 inline void
231  const NamedTuple& record, std::index_sequence<I...>) {
232  // see namedtuple_impl::print_tuple for explanation
233  using std::get;
234  using Vacuum = int[];
235  (void)Vacuum{(write_bytes(&get<I>(record)), 0)...};
236 }
237 
238 template<typename NamedTuple>
239 template<typename T>
240 inline void
242  m_file.write(reinterpret_cast<const char*>(ptr), sizeof(T));
243 }
244 
245 } // namespace dfe