#include #include #include #include #include #include #include #include "interpolate.h" #include "error.h" #include "kblib/kblib.h" #include "fda.h" #include "logger.h" #include #include "randutils.hpp" #include "srell.hpp" std::string toUTF8(const std::u16string&) {return {};} std::u16string fromUTF8(const std::string&) {return {};} extern const char* vnum; randutils::random_generator randgen; using std::cin; using std::cout; using std::cerr; using std::clog; using std::string; using std::u16string; using std::u32string; using namespace std::string_literals; /* * refParse: string -> vector * class reference * * chooseFrom: grammar, node, depth, maxdepth -> word * class word * class grammar * * applyRE: word -> word * * listAll: grammar, node, depth, maxdepth -> list * * formatWord: word, opts, formatStr -> string * * printPath: path -> string * readPath: string -> path * followPath: path -> word * class path * * main: * gen: grammar, node,, count, depth -> list * list: grammar, node, depth -> list * * * * loadGrammar: YAML::Node -> grammar * dumpGrammar: grammar -> YAML::Node * * toBNF: grammar, node -> BNFGrammar * fromBNF: BNFGrammar -> grammar * */ bool test_file(std::istream& tfile, const bytecode_machine& BM) { std::string cTestCase; std::string cToken; while (tfile) { std::getline(tfile, cTestCase); cout<(code)) <<"\n"; return false; } else if (cToken != got) { cerr<<"Test case failed: (mode 3)" <<"\nCase: "<(code)) <<"\n"; cerr<<"Extra information:\n" <(help_text), sizeof(help_text)}; } int main(int argc, char** argv) { // c_test_func(); try { log_info("wordgen v", vnum); TCLAP::CmdLine cmd("Generate random words/sentences matching" " a context-free grammar", ' ', vnum); TCLAP::UnlabeledValueArg file("file", "The datafile to use, or 'help', which will print extended information.", true, "", "filename|help", cmd); TCLAP::UnlabeledValueArg root("root", "Root node", true, "", "root", cmd); TCLAP::ValuesConstraint commands( "gen", "list", "diag", "show", "xform" ); TCLAP::UnlabeledValueArg command("command", "command", false, "gen", &commands, cmd); TCLAP::MultiArg channels("c", "channel", "print CHANNEL", false, "CHANNEL", cmd); TCLAP::SwitchArg ipaMode("p", "ipa", "print IPA transcriptions (-c ipa)", cmd); TCLAP::SwitchArg allChannels("C", "printAll", "print all channels present (overrides -c, -p, -V)", cmd); TCLAP::ValueArg depth("d", "depth", "maximum recursion depth", false, -1, "int", cmd); TCLAP::ValueArg expansions("e", "expansions", "maximum number of expansions (implies -d equal to -e) (defaults to depth^2)", false, -1, "int", cmd); TCLAP::SwitchArg HTMLMode("H", "html", "write output as HTML table", cmd); TCLAP::ValueArg num("n", "num", "number of words to generate", false, 1, "int", cmd); TCLAP::SwitchArg noVal("V", "noVal", "suppress implicit 'val' printing", cmd); TCLAP::SwitchArg quiet("q", "quiet", "don't print the header", cmd); TCLAP::SwitchArg silent("Q", "silent", "don't print anything except diagnostics", cmd); TCLAP::ValueArg fstr("F", "fmt", "format string for printing words", false, "", "fmt_str", cmd); TCLAP::ValuesConstraint loglevels( "none", "err", "error", "warn", "notice", "info", "debug" ); TCLAP::ValueArg loglevel("l", "loglevel", "Set the logging level. Defaults to 'notice'.", false, "notice", &loglevels, cmd); TCLAP::SwitchArg listZeros("0", "listZeros", "list: include 0-frequency values", cmd); TCLAP::SwitchArg dumpREs("", "regex", "diag: Dump regular expressions", cmd); TCLAP::SwitchArg dumpNodes("", "nodes", "diag: Dump switching nodes", cmd); TCLAP::SwitchArg testREs("", "retest", "diag: Apply tranformations from CHANNELS to input", cmd); TCLAP::SwitchArg EbnfExport("", "bnf", "diag: Export to EBNF (val only)", cmd); TCLAP::MultiArg testParser("", "tstr", "diag: Test template parser with file(s)", false, "filename", cmd); TCLAP::SwitchArg dumpfmt("", "dumpfmt", "diag: Show the effective format string.", cmd); TCLAP::SwitchArg showPaths("P", "path", "Print paths (-c path)", cmd); TCLAP::SwitchArg keepHistory("K", "keepHistory", "Print intermediate tranformations (may be hard to read)", cmd); TCLAP::ValueArg KHSep("", "KHSep", "Separator between transformation stages", false, u8" → ", "string", cmd); TCLAP::ValueArg seed("r", "seed", "Random seed", false, "", "seed", cmd); cmd.parse(argc, argv); if (file.getValue() == "help") { printHelpText(); return EXIT_SUCCESS; } std::vector printChannels; if (!noVal.getValue()) { printChannels.push_back("val"); } if (ipaMode.getValue()) { printChannels.push_back("ipa"); } if (showPaths.getValue()) { printChannels.push_back("path"); } for (auto c : channels) { printChannels.push_back(c); } randutils::seed_seq_fe128 seed_data{{0}}; if (!seed.getValue().empty()) { seed_data.seed(seed.getValue().begin(), seed.getValue().end()); } else { [&](auto& seed){ std::array data; seed.param(data.begin()); seed_data.seed(data.begin(), data.end()); }(randutils::auto_seed_128{}.base()); } set_log_level([&]{ using namespace kblib::literals; switch (kblib::FNV32a(loglevel.getValue())) { case "none"_fnv32: return log_level::silent; case "err"_fnv32: case "error"_fnv32: return log_level::err; case "warn"_fnv32: return log_level::warn; case "notice"_fnv32: return log_level::notice; case "info"_fnv32: return log_level::info; case "debug"_fnv32: return log_level::debug; } }()); int cdepth=-1, cexps=-1; if (depth.getValue() != -1 && expansions.getValue() != -1) { cdepth = depth.getValue(); cexps = expansions.getValue(); } else if (depth.getValue() != -1) { cdepth = depth.getValue(); cexps = cdepth * cdepth; } else if (expansions.getValue() != -1) { cdepth = cexps = expansions.getValue(); } else { cdepth = 24; cexps = 576; } constexpr const std::array files = { "grammar.cpp", "loader.cpp", "word.cpp", "path.cpp", "transform.cpp", }; // std::ifstream retest(file.getValue()); // testRE(retest); try { string fmt = [&]{ if (allChannels.getValue()) { if (HTMLMode.getValue()) { return "{%all%!h}{path!h}"s; } return "{%all%}\t{path}"s; } else if (fstr.getValue().empty()) { std::string mods = [&]{ if (HTMLMode.getValue()) { return "!h"; } else { return ""; } }(); auto base_str = std::accumulate(printChannels.begin(), printChannels.end(), std::string{}, [&, first = true](std::string out, const std::string& ch) mutable { if (first || HTMLMode.getValue()) { first = false; return kblib::concat(out, '{', ch, mods, '}'); } else { return kblib::concat(out, "\\t{", ch, mods, '}'); } }); if (HTMLMode.getValue()) { return kblib::concat("", base_str, ""); } return base_str; } else { if (HTMLMode.getValue()) { return kblib::concat("", kblib::html_encode(fstr.getValue()), ""); } return fstr.getValue(); } }(); // cout<<"Using format "<> kblib::get_line(regex)) { srell::u8regex re(regex); } } else if (dumpfmt.getValue()) { cout<