Akumuli
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
saxencoder.h
1 
17 #pragma once
18 
19 #include <boost/circular_buffer.hpp>
20 #include <boost/range.hpp>
21 
22 
23 namespace Akumuli {
24 namespace SAX {
25 
26 int leading_zeroes(int value);
27 
28 struct SAXWord {
29 
30  // Compression schema
31  // 0 - no data stored (zero symbol)
32  // 10 - 2 bits
33  // 110 - 6 bits
34  // 1110 - E bits
35  // 11110 - 1E bits
36  // 111110 - error
37 
38  enum { SIZE = 16 };
39 
40  char buffer[SIZE];
41 
45  : buffer{ 0 } {}
46 
48  SAXWord(const char* str)
49  : SAXWord(str, str + strlen(str)) {}
50 
52  SAXWord(const SAXWord& other) { memcpy(buffer, other.buffer, SIZE); }
53 
54  SAXWord& operator=(const SAXWord& other) {
55  if (&other != this) {
56  memcpy(buffer, other.buffer, SIZE);
57  }
58  return *this;
59  }
60 
61  bool operator!=(const SAXWord& other) const {
62  return !std::equal(buffer, buffer + SIZE, other.buffer);
63  }
64 
65  bool operator==(const SAXWord& other) const {
66  return std::equal(buffer, buffer + SIZE, other.buffer);
67  }
68 
70  template <class FwdIt>
71  SAXWord(FwdIt begin, FwdIt end)
72  : SAXWord() {
73  int ix = 0;
74  int shift = 0;
75  for (auto payload : boost::make_iterator_range(begin, end)) {
76  int zerobits = leading_zeroes((int)payload);
77  int signbits = 8 * sizeof(int) - zerobits;
78  // Store mask
79  if (signbits == 0) {
80  // just update indexes
81  shift++;
82  } else {
83  int mask = 0;
84  int nmask = 0; // number of bits in mask
85  if (signbits < 3) {
86  mask = 2;
87  nmask = 2;
88  signbits = 2;
89  } else if (signbits < 7) {
90  mask = 6;
91  nmask = 3;
92  signbits = 6;
93  } else if (signbits < 0xF) {
94  mask = 0xE;
95  nmask = 4;
96  signbits = 0xE;
97  } else if (signbits < 0x1E) {
98  mask = 0x1E;
99  nmask = 5;
100  signbits = 0x1E;
101  }
102  for (int i = nmask; i-- > 0;) {
103  if (shift == 8) {
104  ix++;
105  shift = 0;
106  if (ix == SIZE) {
107  std::runtime_error error("SAX word too long");
108  BOOST_THROW_EXCEPTION(error);
109  }
110  }
111  buffer[ix] |= ((1 & (mask >> i)) << shift);
112  shift++;
113  }
114  }
115  // Store payload
116  for (int i = signbits; i-- > 0;) {
117  if (shift == 8) {
118  ix++;
119  shift = 0;
120  if (ix == SIZE) {
121  std::runtime_error error("SAX word too long");
122  BOOST_THROW_EXCEPTION(error);
123  }
124  }
125  buffer[ix] |= ((1 & (payload >> i)) << shift);
126  shift++;
127  }
128  }
129  }
130 
131  template <class It> void read_n(int N, It it) const {
132  int ix = 0;
133  int shift = 0;
134  int mask = 0;
135  int nbits = 0;
136  bool read_payload = false;
137  for (int i = 0; i < N;) {
138  mask <<= 1;
139  mask |= (buffer[ix] >> shift) & 0x1;
140  shift++;
141  if (shift == 8) {
142  ix++;
143  shift = 0;
144  if (ix == SIZE) {
145  std::runtime_error error("sax word decoding out of bounds");
146  BOOST_THROW_EXCEPTION(error);
147  }
148  }
149  switch (mask) {
150  case 0:
151  read_payload = true;
152  nbits = 0;
153  break;
154  case 2:
155  read_payload = true;
156  nbits = 2;
157  break;
158  case 6:
159  read_payload = true;
160  nbits = 6;
161  break;
162  case 0xE:
163  read_payload = true;
164  nbits = 0xE;
165  break;
166  case 0x1E:
167  read_payload = true;
168  nbits = 0x1E;
169  break;
170  default:
171  if (mask > 0x1E) {
172  std::runtime_error error("invalid SAX word encoding");
173  BOOST_THROW_EXCEPTION(error);
174  }
175  break;
176  }
177  if (read_payload) {
178  int payload = 0;
179  for (int j = 0; j < nbits; j++) {
180  payload <<= 1;
181  payload |= (buffer[ix] >> shift) & 0x1;
182  shift++;
183  if (shift == 8) {
184  ix++;
185  shift = 0;
186  if (ix == SIZE) {
187  std::runtime_error error("sax word decoding out of bounds");
188  BOOST_THROW_EXCEPTION(error);
189  }
190  }
191  }
192  *it++ = payload;
193  read_payload = false;
194  mask = 0;
195  nbits = 0;
196  i++;
197  }
198  }
199  }
200 };
201 
202 
204 struct SAXEncoder {
205  int alphabet_;
207 
208  boost::circular_buffer<double> input_samples_;
209  std::string buffer_;
210  std::string last_;
211 
212  SAXEncoder();
213 
217  SAXEncoder(int alphabet, int window_width);
218 
224  bool encode(double sample, char* outword, size_t outword_size);
225 };
226 }
227 }
boost::circular_buffer< double > input_samples_
sliding window width
Definition: saxencoder.h:208
SAXWord(const SAXWord &other)
Copy c-tor.
Definition: saxencoder.h:52
SAXWord(FwdIt begin, FwdIt end)
Copy data from sequence.
Definition: saxencoder.h:71
int window_width_
alphabet size
Definition: saxencoder.h:206
SAXWord()
Definition: saxencoder.h:44
bool encode(double sample, char *outword, size_t outword_size)
Definition: saxencoder.cpp:187
Symbolic Aggregate approXimmation encoder.
Definition: saxencoder.h:204
SAXWord(const char *str)
C-tor for unit-tests.
Definition: saxencoder.h:48
Definition: saxencoder.h:28