#include "machines.h" #include "tr.h" #include "logger.h" #include "error.h" #include "utf8.h" namespace fsm { std::string defaultPlaceholder(std::string_view pattern, std::string_view fill) { using namespace std::literals; enum {start, backslash, lbe, rbe, lb, backslashlb} s = start; std::string out; for (const auto& c : pattern) { switch (s) { case start: switch (c) { default: out.push_back(c); break; case '\\': s = backslash; break; case '{': s = lbe; break; case '}': s = rbe; break; } break; case backslash: out.push_back(c); s = start; break; case backslashlb: out.push_back(c); s = lb; break; case lbe: if (c == '{') { out.push_back(c); s = start; } else if (c == '}') { out.append(fill); s = start; } else { s = lb; } break; case rbe: if (c == '}') { out.push_back(c); s = start; } else { parse_error(ec::t_internal, pattern, &c); } break; case lb: if (c == '}') { out.append(fill); s = start; } else if (c == '\\') { s = backslashlb; } else if (c == '{') { parse_error(ec::t_internal, pattern, &c); } else { } break; } } return out; } std::string toStr(char32_t c) { std::string c_asStr; utf8::append(c, std::back_inserter(c_asStr)); return c_asStr; } auto utf8_begin(std::string_view input) -> utf8::iterator { return utf8::iterator( input.begin(), input.begin(), input.end() ); } auto utf8_end(std::string_view input) -> utf8::iterator { return utf8::iterator( input.end(), input.begin(), input.end() ); } [[nodiscard]] std::string unicode_reverse(std::string_view input) { std::string ret; ret.reserve(input.size()); std::copy( utf8_begin(input), utf8_end(input), kblib::consumer([&](auto c) { ret.insert(0, toStr(c)); })); return ret; } word_data_t state_machine::operator()(Word w, channelID ch) const { std::string& input = w.data[ch]; log_info("fsm"); std::u32string ustr; utf8::utf8to32(input.begin(), input.end(), std::back_inserter(ustr)); if (dir & r_in) { std::reverse(ustr.begin(), ustr.end()); } const state* s = &states.at("S"); std::string output; //initial guess of size; is likely to be at least this large so skip any smaller allocations output.reserve(ustr.size()); for (auto c : ustr) { auto&& [out, dest] = s->step(c); output += out; if (!dest) { log_err(+c); std::terminate(); } s = dest; } output.append(s->suf()); if (dir & r_out) { output = unicode_reverse(output); } return std::move(w.data); } std::pair state::step(char32_t c) const { std::string c_asStr = toStr(c); auto dest = [&](const state* dest) { return dest ? dest : this; }; auto act = [&](const state::action_t act) -> std::pair { return {defaultPlaceholder(act.format, c_asStr), dest(act.dest)}; }; if (auto [it, f] = kblib::get_check(cmap, c); f) { return act(it->second); } if (!set.empty()) { for (const auto& r : set) { if (tr::match(r.matches, c_asStr)) { return act(r.action); } } } if (!map.empty()) { for (const auto& m : map) { if (auto p = std::find(m.in_set.begin(), m.in_set.end(), c); p != m.in_set.end()) { return {toStr(*p), dest(m.dest)}; } } } if (!match.empty()) { for (const auto& m : match) { if (std::count(m.matches.begin(), m.matches.end(), c)) { return {toStr(c), dest(m.dest)}; } } } if (def) { return act(*def); } return {toStr(c), dest(ret)}; } } // namespace fsm