My Project
interpolate.h
1 #ifndef INTERPOLATE_H_INCLUDED_
2 #define INTERPOLATE_H_INCLUDED_
3 
4 #include "containers.h"
5 #include "random.h"
6 #include "tstrings.h"
7 
8 #include <yaml-cpp/yaml.h>
9 
10 #include "cppcoro/generator.hpp"
11 #include <algorithm>
12 #include <experimental/coroutine>
13 #include <functional>
14 #include <limits>
15 #include <optional>
16 #include <string>
17 #include <unordered_map>
18 #include <vector>
19 
20 namespace details {
21 template <typename T>
22 struct tag {
23  using type = T;
24 };
25 
26 // base case: just fail
27 template <uint64_t V, typename...>
28 struct min_unsigned;
29 
30 // recursive case: check using numeric_limits
31 template <uint64_t V, typename T, typename... Ts>
32 struct min_unsigned<V, T, Ts...>
33  : std::conditional_t<(V <= sizeof(T)), tag<T>, min_unsigned<V, Ts...>> {};
34 
35 template <uint64_t V>
36 using min_unsigned_t = typename details::min_unsigned<V, uint8_t, uint16_t,
37  uint32_t, uint64_t>::type;
38 } // namespace details
39 
40 template <std::size_t N>
41 struct str_prefix {
42  static_assert(N > 0, "str_prefix of size zero is disallowed");
43  char str[N];
44 
45  constexpr str_prefix() : str{} {}
46 
47  /* implicit */ constexpr str_prefix(std::string_view s) : str{} {
48  kblib::copy_n(s.begin(), std::min(s.size(), N), &str[0]);
49  }
50 
51  constexpr str_prefix reverse() const {
52  str_prefix reversed;
53  for (std::size_t i = 0; i < N; ++i) {
54  reversed.str[N - i - 1] = str[i];
55  }
56  return reversed;
57  }
58 
59  // I checked already and compilers can't inline this code, but it's needed
60  // for constexpr capability.
61  template <std::size_t shift = std::numeric_limits<unsigned char>::digits*(N -
62  1)>
63  constexpr auto to_uint() const {
64  using To = details::min_unsigned_t<N + shift / CHAR_BIT>;
65  static_assert(N <= sizeof(To));
66  if constexpr (N >= 2) {
67  return static_cast<To>(str[N - 1]) << shift |
68  static_cast<To>(
69  str_prefix<N - 1>(std::string_view{std::begin(str), N})
70  .template to_uint<
71  shift -
72  std::numeric_limits<unsigned char>::digits>());
73  } else {
74  return static_cast<To>(str[0]) << shift;
75  }
76  }
77 
78  // This function is much faster than the above but is not constexpr.
79  auto to_uint_fast() const {
80  using To = details::min_unsigned_t<N>;
81  static_assert(N <= sizeof(To));
82  To x;
83  if constexpr (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) {
84  std::memcpy(&x, &str[0], N);
85  } else {
86  auto r = reverse();
87  std::memcpy(&x, &r.str[0], N);
88  }
89  return x;
90  }
91 
92  constexpr char& operator[](std::size_t n) { return str[n]; }
93  constexpr const char& operator[](std::size_t n) const { return str[n]; }
94 
95  constexpr friend bool operator==(str_prefix lhs, str_prefix rhs) {
96  return kblib::equal(&lhs[0], &lhs[N - 1], &rhs[0]);
97  }
98  constexpr friend bool operator!=(str_prefix lhs, str_prefix rhs) {
99  return !(lhs == rhs);
100  }
101  constexpr friend bool operator<(str_prefix lesser, str_prefix greater) {
102  return kblib::lexicographical_compare(&lesser[0], &lesser[N - 1],
103  &greater[0], &greater[N - 1]);
104  }
105  constexpr friend bool operator>(str_prefix greater, str_prefix lesser) {
106  return lesser < greater;
107  }
108  constexpr friend bool operator<=(str_prefix lesser, str_prefix greater) {
109  return !(greater < lesser);
110  }
111  constexpr friend bool operator>=(str_prefix greater, str_prefix lesser) {
112  return !(greater < lesser);
113  }
114 };
115 
116 // In order for two channel names to collide, they must have a common prefix of
117 // at least 4 characters and hash to the same 32-bit value. Considering a file
118 // can be assumed to have less than ten channels all with distinct and short
119 // names, this is astronomically unlikely.
120 using channelID = uint64_t;
121 
122 constexpr inline channelID ch(std::string_view s) {
123  return (static_cast<uint64_t>(str_prefix<4>(s).to_uint_fast()) << 32u |
124  kblib::FNVa<std::uint32_t>(s));
125 }
126 
127 constexpr channelID operator""_ch(const char* s, std::size_t len) {
128  return (static_cast<uint64_t>(str_prefix<4>({s, len}).to_uint()) << 32u |
129  kblib::FNVa<std::uint32_t>(std::string_view{s, len}));
130 }
131 
132 struct path_t {
133  int branch;
134  std::vector<path_t> children;
135 };
136 
137 using word_data_t = vmap<channelID, std::string, 8>;
138 
139 struct Word {
140  word_data_t data;
141  double freq;
142  path_t path;
143 
144  std::string& val() { return data["val"_ch]; }
145  const std::string& val() const { return data.at("val"_ch); }
146 };
147 
148 inline auto word_from_val(std::string v) -> word_data_t {
149  word_data_t ret;
150  ret.emplace("val"_ch, std::move(v));
151  return ret;
152 }
153 
154 std::string format(const word_data_t& data, std::string_view fmt,
155  const path_t* path = nullptr);
156 
157 inline std::string format(Word word, std::string_view fmt) {
158  // word.data.insert_or_assign("freq"_ch, std::to_string(word.freq));
159  word.data.insert_or_assign("freq"_ch, kblib::toStr(word.freq));
160  return format(word.data, fmt, &word.path);
161 }
162 
163 using string_transformer = std::function<word_data_t(Word, channelID)>;
164 
165 using replace_sequence = std::vector<string_transformer>;
166 
167 struct node {
168  using Alternative = vmap<channelID, bytecodes>;
169  std::vector<std::variant<double, bytecodes>> freqs;
170  std::vector<bytecodes> vals;
171  std::vector<Alternative> other_channels;
172  unsigned short declared_argc;
173  bool variadic;
174  std::string name;
175 };
176 
177 struct ReplaceStage {
178  vmap<channelID, replace_sequence> actions;
179  Word operator()(const Word&) const;
180 };
181 
182 class datafile;
183 
184 using flist_t = std::vector<double>;
185 using ilist_t = std::vector<std::pair<std::size_t, std::optional<double>>>;
186 
187 Word chooseFrom(
188  const datafile& data, RandomGenerator& rng, counters c, const node& n,
189  const std::vector<std::string>& args,
190  const std::variant<std::monostate, flist_t, ilist_t>& freq_override);
191 
192 cppcoro::generator<Word>
193 enumerate(const datafile& data, counters c, const node& n,
194  std::vector<std::string> args,
195  std::variant<std::monostate, flist_t, ilist_t> freq_override,
196  bool include_zeros);
197 
198 class datafile {
199  public:
200  datafile(YAML::Node data, RandomGenerator&);
201 
202  Word transform(Word, bool keephist, std::string_view hist_sep) const;
203 
204  Word
205  generate(RandomGenerator& rng, counters c, const node& node,
206  const std::vector<std::string>& args,
207  const std::variant<std::monostate, flist_t, ilist_t>& freq_override,
208  bool keephist, std::string_view sep) {
209  auto w = transform(
210  chooseFrom(*this, rng, c, node, args, std::move(freq_override)),
211  keephist, sep);
212  bm.variables.clear();
213  return w;
214  }
215 
216  // private:
217 
218  bytecode_machine bm;
219 
220  static std::tuple<std::string, int, bool> parse_nodename(std::string_view);
221 
222  static std::unordered_map<channelID, std::string>& ch_db();
223 
224  word_data_t channelNames;
225  vmap<channelID, std::shared_ptr<datafile>> externFiles;
226  small_vector<ReplaceStage, 1> replace;
227  std::optional<std::string> startNode;
228 
229  vmap<std::string, std::vector<node>, 1> nodes;
230 
231  template <typename Datafile>
232  static auto find_node_impl(Datafile& This, std::string_view name);
233  template <typename Map>
234  static auto find_node_impl(Map& nodes, std::string_view name, int argc)
235  -> kblib::copy_const_t<Map, node>* {
236  auto nlist = nodes.find(name);
237 
238  if (nlist == nodes.end()) {
239  return nullptr;
240  }
241  for (auto&& n : nlist->second) {
242  if (n.declared_argc == argc ||
243  (n.variadic && n.declared_argc <= argc)) {
244  return &n;
245  }
246  }
247  return nullptr;
248  }
249 
250  node* find_node(std::string_view name);
251  const node* find_node(std::string_view name) const;
252  node* find_node(std::string_view name, int argc) {
253  return find_node_impl(nodes, name, argc);
254  }
255  const node* find_node(std::string_view name, int argc) const {
256  return find_node_impl(nodes, name, argc);
257  }
258 
259  datafile* lookup_domain(std::string_view name) {
260  if (name.empty()) {
261  return this;
262  }
263 
264  if (auto check = kblib::get_check(externFiles, ch(name))) {
265  return check->second.get();
266  }
267  return nullptr;
268 
269  // auto it = externFiles.find(ch(name));
270  // if (it == externFiles.end()) {
271  // return nullptr;
272  // }
273  // return it->second.get();
274  }
275  const datafile* lookup_domain(std::string_view name) const {
276  if (name.empty()) {
277  return this;
278  }
279  if (auto check = kblib::get_check(externFiles, ch(name))) {
280  return check->second.get();
281  }
282  return nullptr;
283  // auto it = externFiles.find(ch(name));
284  // if (it == externFiles.end()) {
285  // return nullptr;
286  // }
287  // return it->second.get();
288  }
289 };
290 
291 struct noderef {
292  std::optional<const datafile*> source;
293  std::string name;
294  std::vector<std::string> args;
295  std::variant<std::monostate, flist_t, ilist_t> freq_override;
296 };
297 
298 // After eval
299 [[nodiscard]] cppcoro::generator<
300  std::variant<std::string, noderef, wordgen_error>>
301 fparse(const datafile&, const std::string& s);
302 
303 small_vector<double, 16> freqs_of(const bytecode_machine& bm, const node& n,
304  const std::vector<std::string>& args,
305  const counters& c, int argc);
306 
308  std::string pre_text;
309  std::optional<std::string> id;
310  std::string mods;
311 };
312 
313 cppcoro::generator<basic_parse_token> basic_parser(std::string_view input);
314 
315 #endif // INTERPOLATE_H_INCLUDED_
Definition: interpolate.h:167
Definition: interpolate.h:41
Definition: interpolate.h:28
Definition: interpolate.h:198
Definition: interpolate.h:20
Definition: interpolate.h:177
Definition: interpolate.h:139
Definition: interpolate.h:307
Definition: interpolate.h:22
Definition: interpolate.h:132
Definition: tstrings.h:175
Definition: tstrings.h:196
Definition: interpolate.h:291