#include #include #include #include #include #include #include "grammar.h" #include "word.h" #include "loader.h" #include "FRC.h" extern const char* vnum; randutils::random_generator randgen; int complexity::gE = 0; using std::cin; using std::cout; using std::cerr; using std::clog; using std::endl; using std::string; using std::u32string; using namespace std::string_literals; /* * refParse: string -> vector * class reference * * chooseFrom: grammar, node, depth, maxdepth -> word * class word * class grammar * * applyRE: word -> word * * listAll: grammar, node, depth, maxdepth -> list * * formatWord: word, opts, formatStr -> string * * printPath: path -> string * readPath: string -> path * followPath: path -> word * class path * * main: * gen: grammar, node,, count, depth -> list * list: grammar, node, depth -> list * * * * loadGrammar: YAML::Node -> grammar * dumpGrammar: grammar -> YAML::Node * * toBNF: grammar, node -> BNFGrammar * fromBNF: BNFGrammar -> grammar * */ //For debugging std::string d_toUTF8(const icu::UnicodeString& s) { std::string res; return s.toUTF8String(res); } icu::UnicodeString d_fromUTF8(std::string s) { return icu::UnicodeString::fromUTF8(s); } int main(int argc, char** argv) { try { TCLAP::CmdLine cmd("Generate random words/sentences matching" " a context-free grammar", ' ', vnum); #ifdef _DEBUG clog<<"wordgen v"< file("file", "datafile", true, "", "filename", cmd); TCLAP::UnlabeledValueArg root("root", "Root node", true, "", "root", cmd); TCLAP::UnlabeledValueArg command("command", "command", false, "gen", "gen|list|diag", cmd); TCLAP::MultiArg channels("c", "channel", "print CHANNEL", false, "CHANNEL", cmd); TCLAP::SwitchArg ipaMode("p", "ipa", "print IPA transcriptions (-c ipa)", cmd); TCLAP::ValueArg depth("d", "depth", "maximum recursion depth", false, -1, "int", cmd); TCLAP::ValueArg expansions("e", "expansions", "maximum number of expansions (implies -d equal to -e) (defaults to depth^2)", false, -1, "int", cmd); TCLAP::SwitchArg HTMLMode("H", "html", "write output as HTML table", cmd); TCLAP::ValueArg num("n", "num", "number of words to generate", false, 1, "int", cmd); TCLAP::SwitchArg noVal("V", "noVal", "suppress implicit 'val' printing", cmd); TCLAP::SwitchArg quiet("q", "quiet", "don't print the header", cmd); TCLAP::ValueArg fstr("F", "fmt", "format string for printing words", false, "", "fmt_str", cmd); TCLAP::SwitchArg listZeros("0", "listZeros", "list: include 0-frequency values", cmd); TCLAP::SwitchArg dumpREs("", "regex", "Dump regular expressions", cmd); TCLAP::SwitchArg dumpNodes("", "nodes", "Dump switching nodes", cmd); TCLAP::SwitchArg testREs("", "retest", "Apply tranformations from CHANNELS to input", cmd); TCLAP::SwitchArg EbnfExport("", "bnf", "Export to EBNF (val only)", cmd); TCLAP::SwitchArg showPaths("P", "path", "Print paths (-c path)", cmd); TCLAP::SwitchArg keepHistory("K", "keepHistory", "Print intermediate tranformations (may be hard to read)", cmd); TCLAP::ValueArg KHSep("", "KHSep", "Separator between transformation stages", false, " → ", "string", cmd); TCLAP::ValueArg seed("r", "seed", "Random seed", false, "", "seed", cmd); cmd.parse(argc, argv); std::vector printChannels; if (!noVal.getValue()) { printChannels.push_back(u"val"); } if (ipaMode.getValue()) { printChannels.push_back(u"ipa"); } if (showPaths.getValue()) { printChannels.push_back(u"path"); } for (auto c : channels) { printChannels.push_back(fromUTF8(c)); } if (!seed.getValue().empty()) { // randgen.seed(); } int cdepth=-1, cexps=-1; if (depth.getValue() != -1 && expansions.getValue() != -1) { cdepth = depth.getValue(); cexps = expansions.getValue(); } else if (depth.getValue() != -1) { cdepth = depth.getValue(); cexps = cdepth * cdepth; } else if (expansions.getValue() != -1) { cdepth = cexps = expansions.getValue(); } else { cdepth = 24; cexps = 576; } constexpr const std::array files = { "grammar.cpp", "loader.cpp", "word.cpp", "path.cpp", "transform.cpp", }; auto Data = YAML::LoadFile(file.getValue()); try { Grammar g = loadGrammar(Data); Grammar::ParsedString tmpv{fromUTF8(root.getValue()), 0, false}; auto rootv = (tmpv.begin()->second) ? fromUTF8(root.getValue()) : fromUTF8("{"s + root.getValue() + "}") ; Grammar::Node arg{rootv, g}; #ifdef _DEBUG clog< e) { cerr<<"Regex error: "< e) { cerr<<"ICU error: " < e) { cerr<<"Regex error [load]: "< e) { cerr<<"ICU error: " <