#ifndef GRAMMAR_H_INCLUDED #define GRAMMAR_H_INCLUDED #include #include // #include // #include #include #include #include #include #include #include // #include #include "pcg/pcg_random.hpp" #include "randutils.hpp" #include #include #include #include "FRC.h" #include "path.h" class Word; class TransformatedWord; namespace YAML { class Node; }; struct complexity { const int maxDepth, maxExpansions; int depth, *expansions; static int gE; //Increments *this* complexity's expansions and the copy's depth constexpr complexity incr() {return {maxDepth, maxExpansions, depth+1, expansions ? (&(++(*expansions))) : expansions};} constexpr bool operator!() const { if (expansions && (*expansions >= maxExpansions)) { std::clog<<"wordgen: Expansion limit exceeded."<= maxDepth) { std::clog<<"wordgen: Recursion depth limit exceeded."<= maxDepth || (expansions && (*expansions >= maxExpansions)); } constexpr operator bool() const {return !!*this;} constexpr complexity(int md, int me, int d=0, int* e=&gE) : maxDepth(md), maxExpansions(me), depth(d), expansions(e) {;} constexpr complexity(int c, int me, int* e) : maxDepth(c), maxExpansions(me), depth(0), expansions(e) {;} constexpr complexity(int c=24, int* e=&gE) : maxDepth(c), maxExpansions(c*c), depth(0), expansions(e) {;} }; static constexpr complexity DEFAULT_COMPLEXITY; class Grammar { public: friend Grammar loadGrammar(YAML::Node); Grammar() : channelNames({u"val", u"freq", u"path" }), channelDescs({u"Words", u"Frequencies", u"Paths"}), _nullNode(nodeData{}, *this) {;} using channelID = unsigned short; //Inserts if not present, description = channel channelID chI(const icu::UnicodeString& channel); //Throws if not present channelID chI(const icu::UnicodeString& channel) const; //Throws if not present const icu::UnicodeString& chN(channelID channel) const; //Inserts and updates description channelID nameChannel(const icu::UnicodeString& channel, const icu::UnicodeString& desc); //Throws if not present channelID nameChannel(channelID channel, const icu::UnicodeString& desc); std::string printAllChannelNames() const; using ArgsType = std::vector; class Node; Word genWord(Node const* node, complexity c) const; class ArgRef { public: ArgRef(const std::tuple& val) : ArgRef(std::get<0>(val), std::get<1>(val), std::get<2>(val)) {;} ArgRef(const icu::UnicodeString&, int argC=0, bool argV=false); icu::UnicodeString operator()(const ArgsType& args) const { return call(args); } private: std::function call; }; using ArgStr = std::vector>; using freqlist = std::vector; using indexlist = std::vector>; static ArgStr parseArgs(const icu::UnicodeString&, int argC, bool argV); static icu::UnicodeString applyArgs(const ArgStr&, const ArgsType&); class ParsedString { public: ParsedString(); ParsedString(const icu::UnicodeString&, int argC=0, bool argV=false); ParsedString sub(const ArgsType&, int argC, bool argV) const; //iterator functions auto begin() noexcept {return elements.begin();} auto end() noexcept {return elements.end();} auto begin() const noexcept {return elements.cbegin();} auto end() const noexcept {return elements.cend();} auto cbegin() const noexcept {return elements.cbegin();} auto cend() const noexcept {return elements.cend();} auto rbegin() noexcept {return elements.rbegin();} auto rend() noexcept {return elements.rend();} auto rbegin() const noexcept {return elements.crbegin();} auto rend() const noexcept {return elements.crend();} auto crbegin() const noexcept {return elements.crbegin();} auto crend() const noexcept {return elements.crend();} auto size() const noexcept {return elements.size();} auto empty() const noexcept {return elements.empty();} //If this ParsedString represents no content bool null() const noexcept {return elements.empty() || !(elements[0].first.length() || elements[0].second);} class NodeRef { public: NodeRef() : isRef(false), argCount(0), varArgs(false) {;} NodeRef(const icu::UnicodeString& val, int argC=0, bool argV=false); ~NodeRef() = default; NodeRef sub(const ArgsType&) const; operator bool() const noexcept; bool hasArgs() const noexcept {return boost::apply_visitor([](auto a){return !a.empty();}, args);} Path firstOf(const Grammar&, bool, complexity) const; Path firstOf(const Grammar&, bool, const ArgsType&, complexity) const; Word chooseOf(const Grammar&, Path*, complexity) const; Word chooseOf(const Grammar&, const ArgsType&, Path*, complexity) const; const icu::UnicodeString& getName() const; icu::UnicodeString getName(const ArgsType&) const; enum refstyle {nr_None, nr_ilist, nr_flist}; private: NodeRef(icu::UnicodeString&& n, ArgsType&& a, freqlist&& f, indexlist&& i) : name(n), args(a), flist(f), ilist(i) {;} refstyle checkRefStyle() const; bool isRef; int argCount; bool varArgs; boost::variant name; boost::variant args; boost::variant flist; boost::variant ilist; static std::logic_error _needArgs() { return std::logic_error{"nodeRef called without args"}; } }; private: std::vector> elements; int argCount; bool varArgs; }; using nodeData = std::vector>; class aNode { protected: }; class tNode; class wNode; class Node : public aNode { public: Node(const nodeData&, Grammar&); Node(const icu::UnicodeString& v, Grammar& g) : source(g), countsAs(1), branches(1, Branch{v, 1.0, {std::make_pair(g.chI(u"val"), v)}}) {;} Word selectFrom(Path* select, complexity c) const; Path first(complexity c, bool incZero=false) const; wNode cloneWithFreqList(freqlist flist) const; wNode cloneWithIndexList(indexlist ilist) const; Word raw(unsigned b) const; friend class tNode; friend class wNode; struct Branch { ParsedString val; double freq; std::map other_channels; }; protected: Grammar& source; int countsAs; std::vector branches; }; const Node& nullNode() const { return _nullNode; } class cNode : public aNode { public: cNode(const std::map&, int, Grammar&); Word selectFrom(Path* select, complexity c) const; Path first(complexity c) const; //No wNode functions; get the corresponding Node. Word raw() const; protected: Grammar& source; int pathNum; Node::Branch b; }; class wNode : public aNode { public: friend class Node; Word selectFrom(Path* select, complexity c) const; Path first(complexity c, bool incZero=false) const; ~wNode(); private: wNode(Node const * f, const std::vector& fl) : orig(f), which(_fl), flist(fl) {if (!orig) throw nullptr;} wNode(Node const * f, const std::vector>& il) : orig(f), which(_il), ilist(il) {if (!orig) throw nullptr;} Node const * const orig; enum {_fl, _il} which; union { std::vector flist; std::vector> ilist; }; }; class tNode : public aNode { public: tNode(const nodeData& data, Grammar& g, int c, bool vararg=false); Node applyArgs(const ArgsType& args) const; Word selectFrom(const ArgsType& args, Path* select, complexity c) const; Path first(const ArgsType& args, complexity c, bool incZero=false) const; struct Branch { ParsedString val; ArgStr freq; std::map other_channels; }; protected: Grammar& source; int argc; bool varargs; std::vector branches; }; TransformatedWord applyTransforms(const Word& w) const; class Transformer { public: virtual icu::UnicodeString operator()(const icu::UnicodeString&, const Word&) const; virtual ~Transformer() {;} }; class assigner : public Transformer { public: assigner(const icu::UnicodeString&, Grammar*); icu::UnicodeString operator()(const icu::UnicodeString&, const Word&) const override; private: ParsedString assign; Grammar* g; }; class regexer : public Transformer { public: // regexer(std::vector r) : rules(r) {;} template regexer(V&& r) : rules(std::forward(r)) {;} icu::UnicodeString operator()(const icu::UnicodeString&, const Word&) const override; struct regex_type { regex_type() = default; //Retains and becomes owner of m regex_type(icu::RegexPattern* m, const icu::UnicodeString& r) : match(m), replace(r) {;} regex_type(const UnicodeString& regex, UParseError& pe, UErrorCode& status, const icu::UnicodeString& r) : match(icu::RegexPattern::compile(regex, pe, status)), replace(r) {;} std::unique_ptr match; icu::UnicodeString replace; }; private: std::vector rules; }; /* class regexer2 : public Transformer { public: // regexer(std::vector r) : rules(r) {;} template regexer2(V&& r) : rules(std::forward(r)) {;} icu::UnicodeString operator()(const icu::UnicodeString&, const Word&) const override; struct regex_type { regex_type() = default; //Retains and becomes owner of m regex_type(const std::u32string& m, const std::u32string& r) : match(m), replace(r) {;} regex_type(const UnicodeString& regex, const icu::UnicodeString& r) : match(toUTF32(regex)), replace(toUTF32(r)) {;} std::basic_regex> match; std::u32string replace; }; private: std::vector rules; };*/ class state_machine : public Transformer { public: icu::UnicodeString operator()(const icu::UnicodeString&, const Word&) const override; friend std::unique_ptr extractStates(const YAML::Node& n); class State { public: State() : hasDefault(false) {;} std::pair operator()(UChar32 c) const; const icu::UnicodeString& end() const {return endRule;} friend std::unique_ptr extractStates(const YAML::Node& n); static icu::UnicodeString sub(const ParsedString& replace, UChar32 c); //No-op interface class class Rule { public: Rule(const icu::UnicodeString& state=u"") : gotoState(state) {;} std::pair operator()(UChar32 c) const { return {c,gotoState}; } protected: icu::UnicodeString gotoState; }; class CharRule : public virtual Rule { public: CharRule(const icu::UnicodeString& out=u"{}", const icu::UnicodeString& state=u"") : Rule(state), replace(out) {;} std::pair operator()(UChar32 c) const { return {sub(replace, c), gotoState}; } protected: ParsedString replace; }; class MatchRule : public virtual Rule { public: MatchRule(const icu::UnicodeString& set1, const icu::UnicodeString& state=u"") : Rule(state), matchSet(set1) {;} std::pair operator()(UChar32 c) const; bool match(UChar32 c) const; protected: icu::UnicodeString matchSet; }; class SetRule : public CharRule, public MatchRule { public: SetRule(const icu::UnicodeString& set1, const icu::UnicodeString& out, const icu::UnicodeString& state=u"") : Rule(state), CharRule(out, state), MatchRule(set1, state) {;} std::pair operator()(UChar32 c) const; }; class MapRule : public MatchRule { public: MapRule(const icu::UnicodeString& set1, const icu::UnicodeString& set2, const icu::UnicodeString& state=u"") : Rule(state), MatchRule(set1), repSet(set2) {;} std::pair operator()(UChar32 c) const; protected: icu::UnicodeString repSet; }; private: CharRule defaultRule; bool hasDefault; std::map charRules; std::vector setRules; std::vector matchRules; std::vector mapRules; icu::UnicodeString returnState; icu::UnicodeString endRule; }; enum dir : uint8_t { Forward = 0, InRev = 1, OutRev = 2, InOutRev = InRev | OutRev, }; state_machine() =default; state_machine(const std::map& s, dir d=dir::Forward) : states(s), tapeDirection(d) {;} private: std::map states; dir tapeDirection; }; private: std::map nodes; std::map cnodes; std::map, tNode> tnodes; // std::map nodes; // std::map, tNode>> tNodes; // std::map channels; std::map>> transform_stages; // boost::bimap< // boost::bimaps::vector_of, // icu::UnicodeString, // boost::bimaps::unconstrained_set_of_relation // > channelNames; std::vector channelNames; std::vector channelDescs; Node _nullNode; }; namespace AST { template struct Node { virtual const char* getType() const {return "generic";} }; //Dummy definition template struct text{}; template <> struct text : public Node { virtual const char* getType() const {return "text";} std::vector>> value; }; template <> struct text : public Node, public text { virtual const char* getType() const {return "text";} icu::UnicodeString value; }; template struct number : public Node { virtual const char* getType() const {return "float";} double value; }; template <> struct number : public Node { virtual const char* getType() const {return "float";} std::vector>> value; }; template struct inumber : public Node { virtual const char* getType() const {return "int";} long long value; }; template <> struct inumber : public Node { virtual const char* getType() const {return "int";} std::vector>> value; }; template struct iList : public Node { virtual const char* getType() const {return "iList";} std::vector>> value; }; template struct fList : public Node { virtual const char* getType() const {return "fList";} std::vector>,std::unique_ptr>>> value; }; template struct NodeRef : public Node { virtual const char* getType() const {return "NodeRef";} std::string sourceFile; std::unique_ptr> name;//includes args std::unique_ptr> ilist; std::unique_ptr> flist; }; template struct ArgRef{}; template<> struct ArgRef : public text { virtual const char* getType() const {return "ArgRefConst";} enum class funName { null=0, len, plus, minus, times, divide, select, which, num, gt, lt, eq, concat, repeat, math, err=-1, } name; icu::UnicodeString oname; std::unique_ptr> params; }; template<> struct ArgRef : public ArgRef { virtual const char* getType() const {return "ArgRef";} std::unique_ptr simple; }; } #endif