#ifndef INTERPOLATE_H_INCLUDED_ #define INTERPOLATE_H_INCLUDED_ #include "containers.h" #include "random.h" #include "tstrings.h" #include "asyncpp/generator.h" #include #include #include #include #include #include #include #include #include namespace details { template struct tag { using type = T; }; // base case: just fail template struct min_unsigned; // recursive case: check using numeric_limits template struct min_unsigned : std::conditional_t<(V <= sizeof(T)), tag, min_unsigned> {}; template using min_unsigned_t = typename details::min_unsigned::type; } // namespace details template struct [[nodiscard]] str_prefix { static_assert(N > 0, "str_prefix of size zero is disallowed"); char str[N]; constexpr str_prefix() : str{} {} /* implicit */ constexpr str_prefix(std::string_view s) : str{} { kblib::copy_n(s.begin(), std::min(s.size(), N), &str[0]); } constexpr auto reverse() const -> str_prefix { str_prefix reversed; for (std::size_t i = 0; i < N; ++i) { reversed.str[N - i - 1] = str[i]; } return reversed; } // I checked already and compilers can't inline this code, but it's needed // for constexpr capability. template ::digits*(N - 1)> constexpr auto to_uint() const noexcept { using To = details::min_unsigned_t; if (not std::is_constant_evaluated()) { return +static_cast(to_uint_fast()); } static_assert(N <= sizeof(To)); if constexpr (N >= 2) { return static_cast(str[N - 1]) << shift | static_cast( str_prefix(std::string_view{std::begin(str), N}) .template to_uint< shift - std::numeric_limits::digits>()); } else { return static_cast(str[0]) << shift; } } // This function is much faster than the above but is not constexpr. auto to_uint_fast() const noexcept { using To = details::min_unsigned_t; static_assert(N <= sizeof(To)); To x; if constexpr (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) { std::memcpy(&x, &str[0], N); } else { auto r = reverse(); std::memcpy(&x, &r.str[0], N); } return x; } [[nodiscard]] constexpr auto operator[](std::size_t n) noexcept -> char& { return str[n]; } [[nodiscard]] constexpr auto operator[](std::size_t n) const noexcept -> const char& { return str[n]; } [[nodiscard]] constexpr friend auto operator==(str_prefix lhs, str_prefix rhs) noexcept -> bool { return kblib::equal(&lhs[0], &lhs[N - 1], &rhs[0]); } [[nodiscard]] constexpr friend auto operator!=(str_prefix lhs, str_prefix rhs) noexcept -> bool { return not (lhs == rhs); } [[nodiscard]] constexpr friend auto operator<(str_prefix lesser, str_prefix greater) noexcept -> bool { return kblib::lexicographical_compare(&lesser[0], &lesser[N - 1], &greater[0], &greater[N - 1]); } [[nodiscard]] constexpr friend auto operator>(str_prefix greater, str_prefix lesser) noexcept -> bool { return lesser < greater; } [[nodiscard]] constexpr friend auto operator<=(str_prefix lesser, str_prefix greater) noexcept -> bool { return not (greater < lesser); } [[nodiscard]] constexpr friend auto operator>=(str_prefix greater, str_prefix lesser) noexcept -> bool { return not (greater < lesser); } }; // In order for two channel names to collide, they must have a common prefix of // at least 4 characters and hash to the same 32-bit value. Considering a file // can be assumed to have less than ten channels all with distinct and short // names, this is astronomically unlikely. using channelID = uint64_t; constexpr inline auto ch(std::string_view s) noexcept -> channelID { return (static_cast(str_prefix<4>(s).to_uint()) << 32u | kblib::FNVa(s)); } constexpr auto operator""_ch(const char* s, std::size_t len) noexcept -> channelID { return (static_cast(str_prefix<4>({s, len}).to_uint()) << 32u | kblib::FNVa(std::string_view{s, len})); } struct path_t { int branch; std::vector children; }; using word_data_t = vmap; struct Word { word_data_t data; double freq; path_t path; auto val() -> std::string& { return data["val"_ch]; } auto val() const -> const std::string& { return data.at("val"_ch); } }; [[nodiscard]] inline auto word_from_val(std::string v) -> word_data_t { word_data_t ret; ret.emplace("val"_ch, std::move(v)); return ret; } [[nodiscard]] auto format(const word_data_t& data, std::string_view fmt, const path_t* path = nullptr) -> std::string; [[nodiscard]] inline auto format(Word word, std::string_view fmt) -> std::string { // word.data.insert_or_assign("freq"_ch, std::to_string(word.freq)); word.data.insert_or_assign("freq"_ch, kblib::toStr(word.freq)); return format(word.data, fmt, &word.path); } using string_transformer = std::function; using replace_sequence = std::vector; struct node { using Alternative = vmap; std::vector> freqs; std::vector vals; std::vector other_channels; unsigned short declared_argc; bool variadic; std::string name; }; struct ReplaceStage { vmap actions; [[nodiscard]] auto operator()(const Word&) const -> Word; }; class datafile; using flist_t = std::vector; using ilist_t = std::vector>>; [[nodiscard]] auto chooseFrom( const datafile& data, RandomGenerator& rng, counters c, const node& n, const std::vector& args, const std::variant& freq_override) -> Word; [[nodiscard]] auto enumerate( const datafile& data, counters c, const node& n, std::vector args, std::variant freq_override, bool include_zeros) -> asyncpp::generator; class datafile { public: datafile(YAML::Node data, RandomGenerator&); [[nodiscard]] auto transform(Word, bool keephist, std::string_view hist_sep) const -> Word; [[nodiscard]] auto generate( RandomGenerator& rng, counters c, const node& node, const std::vector& args, const std::variant& freq_override, bool keephist, std::string_view sep) -> Word { auto w = transform(chooseFrom(*this, rng, c, node, args, freq_override), keephist, sep); bm.variables.clear(); return w; } // private: template_machine bm; [[nodiscard]] static auto parse_nodename(std::string_view) -> std::tuple; [[nodiscard]] static auto ch_db() -> std::unordered_map&; word_data_t channelNames; vmap> externFiles; small_vector replace; std::optional startNode; vmap, 1> nodes; template [[nodiscard]] static auto find_node_impl(Datafile& This, std::string_view name); template [[nodiscard]] static auto find_node_impl(Map& nodes, std::string_view name, int argc) -> kblib::copy_const_t* { auto nlist = nodes.find(name); if (nlist == nodes.end()) { return nullptr; } for (auto&& n : nlist->second) { if (n.declared_argc == argc or (n.variadic and n.declared_argc <= argc)) { return &n; } } return nullptr; } [[nodiscard]] auto find_node(std::string_view name) -> node*; [[nodiscard]] auto find_node(std::string_view name) const -> const node*; [[nodiscard]] auto find_node(std::string_view name, int argc) -> node* { return find_node_impl(nodes, name, argc); } [[nodiscard]] auto find_node(std::string_view name, int argc) const -> const node* { return find_node_impl(nodes, name, argc); } [[nodiscard]] auto lookup_domain(std::string_view name) noexcept -> datafile* { if (name.empty()) { return this; } if (auto check = kblib::get_check(externFiles, ch(name))) { return check->second.get(); } return nullptr; // auto it = externFiles.find(ch(name)); // if (it == externFiles.end()) { // return nullptr; // } // return it->second.get(); } [[nodiscard]] auto lookup_domain(std::string_view name) const noexcept -> const datafile* { if (name.empty()) { return this; } if (auto check = kblib::get_check(externFiles, ch(name))) { return check->second.get(); } return nullptr; // auto it = externFiles.find(ch(name)); // if (it == externFiles.end()) { // return nullptr; // } // return it->second.get(); } }; struct noderef { std::optional source; std::string name; std::vector args; std::variant freq_override; }; // After eval [[nodiscard]] auto fparse(const datafile&, const std::string& s) -> asyncpp::generator>; [[nodiscard]] auto freqs_of(const template_machine& bm, const node& n, const std::vector& args, const counters& c, int argc) -> small_vector; struct basic_parse_token { std::string pre_text; std::optional id; std::string mods; }; [[nodiscard]] auto basic_parser(std::string_view input) -> asyncpp::generator; #endif // INTERPOLATE_H_INCLUDED_