import sys import yaml import random import time import re import decimal import gmpy2 import optparse import copy import collections from collections import deque from string import * sep = re.compile('[^0-9.]+') expansionCount = 0 class Path: """A path to a word.""" def __next__(self, depth=None): if depth: self._depth = depth # Attempt to increment rightmost-deepest, check for successors q = self.asList() # (self._branch,len(self._data[self._root])) for n,d in reversed(q): if len(n._data[n._root]) > 1: j = n._branch + 1 if j == len(n._data[n._root]): continue while not n._data[n._root][j].get("freq",decimal.Decimal("1")): j = n + 1 if j == len(n._data[n._root]): continue t = n._data[n._root][j] if t.get("freq",decimal.Decimal("1")): L = [{"freq":0}] * n._branch L.append(t) tmp = chooseFrom(n._data, L, self._depth-d+1) print(formatWord(applyRE(n._data, tmp), { "ipa":True, })) n.__init__(n._data, n._root, tmp["path"]) return self raise StopIteration def asList(self): """Generates a heap-like (effectively). Used for breadth-first traversal""" q = list([(self,0)]) i = 0 while i < len(q): # print(q[i]) n,d = q[i] q.extend([(c,d-1) for c in n._children]) i = i + 1 return q def iter(self): return self def getWord(self): """return word corresponding to self suitably for formatWord()""" # print('get: '+self._root) # seed = self._data.SwitchingNode.Branch.extract(seed) Node = self._data[self._root][self._branch] sumFreq = sum([decimal.Decimal(decimal.Decimal(x.get("freq",decimal.Decimal('1')))) for x in self._data[self._root]]) SNode = {"val": Node.get("val",""), "ipa": Node.get("ipa",""), "freq":(decimal.Decimal(Node.get("freq",decimal.Decimal('1')))/sumFreq)} rets = {"val":"", "ipa":"", "freq":decimal.Decimal('1')} # print(self._root+'['+str(self._branch)+'] SNode'+str(SNode)) for i,s in enumerate(Formatter().parse(SNode["val"])): #Reference if s[1]: #Generate subword from subpath # print('s'+str(s)) # print(i) tmp = Path(self._data, s[1], self._children[i]).getWord() rets["val"] = rets["val"] + s[0] + tmp["val"] rets["freq"] = rets["freq"]*tmp["freq"] if s[0]: #If reference+literal text, insert rets["ipa"] = rets["ipa"] + SNode["ipa"] + tmp["ipa"] else: rets["ipa"] = rets["ipa"] + tmp["ipa"] #No reference, only literal text else: rets["val"] = rets["val"] + s[0] rets["ipa"] = rets["ipa"] + SNode["ipa"] return rets def _str(self): """print Path as string""" def recurse(self): ret = gmpy2.mpz(self._branch).digits(62) for a in self._children: ret = ret + '[' + (recurse(a)) + ']' return ret return '+' + recurse(self) #return root + '+' + recurse(self) def _list(self): """Create nested-list from Path""" l = [self._branch] l.extend([c._list() for c in self._children]) return l def __init__(self, Data, root, path): """build Path from string or list""" self._data = Data self._root = root self._depth = -16 # print('root: '+self._root) if isinstance(path, list): self._branch = path[0] self._children = [] # Extract root for each child Path #for c in path[1:]: for i,tnode in enumerate(Formatter().parse(Data[root][self._branch].get("val",""))): # print('tnode'+str(tnode)) if tnode[1]: self._children.append(Path(Data, tnode[1], path[i+1])) elif isinstance(path, str): path = readPath(path) self._branch = path[0] self._children = [] # Extract root for each child Path #for c in path[1:]: for i,tnode in enumerate(Formatter().parse(Data[root][self._branch].get("val",""))): if tnode[1]: self._children.append(Path(Data, tnode[1], path[i+1])) elif isinstance(path, Path): if self._root != path._root: raise ValueError self._branch = path._branch self._children = path._children else: self._branch = 0 self._children = [] # print('Init: '+root+'['+str(self._branch)+'] '+str([c._str() for c in self._children])) class SwitchingGraph(collections.UserDict): class SwitchingNode(collections.UserList): class Branch(collections.UserDict): class NodeRef(collections.UserString): def __repr__(self): if self.flist: return "n'"+self.data+':'+','.join(self.flist)+"'" else: return "n'"+self.data+"'" def __init__(self, s, l): self.data = s self.flist = l def extract(self, val): ret = [] for s in Formatter().parse(val): if s[0]: ret.append(s[0]) if s[1]: nstr = s[1] flist = [] if s[2]: # Trim leading garbage _ = re.match(sep,s[2]) if _: _ = _.end() # Parse list _ = re.split(sep,s[2][_:]) flist = [str(decimal.Decimal(i)) for i in _] ret.append(self.NodeRef(nstr, flist)) return ret def __init__(self, branch): # print(repr(branch)) # print(repr(branch.get("val",""))) # self.val = self.extract(branch.get("val","")) self.data = branch.copy() if isinstance(branch.get("val",""),str): self.data["val"] = self.extract(branch.get("val","")) if "freq" not in branch: self["freq"] = decimal.Decimal(1) def _dump(self): return "{\t"+",\n\t".join([",\n\t".join(C+":"+self.data[C]) for C in self._branch])+"\t}" def __init__(self, branches): self.data = [self.Branch(B) for B in branches] def _dump(self): # return repr(self.data) return [B._dump() for B in self.data] def __init__(self, data): # self.data = {N: self.SwitchingNode(data[N]) for N in data} self.data = dict() for N in data: if N in set(["replace", "replaceIPA", "replacement", "channels"]): continue self.data[N] = self.SwitchingNode(data[N]) self.regexes = RegexApplicator(data) def __getitem__(self, key): if isinstance(key, self.SwitchingNode.Branch.NodeRef) and key.flist: node = self.data[key].copy() for i in range(min(len(key.flist),len(node))): d = decimal.Decimal(key.flist[i]) node[i]['freq'] = d return node else: return self.data[key] def _dump(self): return "\n".join([N+":\n\t"+'\n\t'.join(self.data[N]._dump()) for N in self._nodes]) def toYAML(self): pass def addNode(self, name, branches): self.data[name] = self.SwitchingNode(branches) class RegexApplicator: """This class is NYI""" def __init__(self, data): self.data = {} for N in set(["replace", "replaceIPA", "replacement", "channels"]): if N in data: self.data[N] = data[N] def chooseFrom(Data, list, depth=-16, maxDepth=16): """Select a random value from the list, recursing on references""" # list = [{"val": x.get("val",""), "ipa": x.get("ipa",""), "freq":decimal.Decimal(x.get("freq",decimal.Decimal('1')))} for x in list] global expansionCount expansionCount += 1 for x in list: x["val"] = x.get("val","") x["freq"] = decimal.Decimal(x.get("freq",decimal.Decimal('1'))) for ch in Data["channels"]: x[ch] = x.get(ch,"") listSum = sum([x["freq"] for x in list]) a = decimal.Decimal(random.uniform(0,float(listSum))) stop = 0 Rets = [] # This needs no normalization because values are never directly compared. for i,c in enumerate([x["freq"] for x in list]): a -= c if a <= 0: stop = i break; other_channels = list[stop].copy() for _ in set(["val","freq","path"]): other_channels.pop(_,"") if expansionCount >= maxDepth**2: #Expansion limit reached print("wordgen.py: expansion limit reached", file=sys.stderr) rets = {"val": list[stop]["val"], "path": [stop], "freq":list[stop]["freq"]/listSum} for ch in other_channels: rets[ch] = list[stop].get(ch,"") return rets elif depth >= 0: #Recursion limit reached print("wordgen.py: recursion limit reached", file=sys.stderr) rets = {"val": list[stop]["val"], "path": [stop], "freq":list[stop]["freq"]/listSum} for ch in other_channels: rets[ch] = list[stop].get(ch,"") return rets else: rets = {"val": "", "freq": decimal.Decimal(1)/listSum, "path": [stop]} #If val is empty, insert channels and bail if not list[stop]["val"]: for ch in other_channels: rets[ch] = list[stop].get(ch,"") return rets #Determine which is a string and which is a reference else: for s in Formatter().parse(list[stop]["val"]): #Recurse on reference and insert results into string if s[1]: node = copy.deepcopy(Data[s[1]]) if s[2]: _ = re.match(sep,s[2]) if _: _ = _.end() flist = re.split(sep,s[2][_:]) nstr = s[1] for i in range(min(len(flist),len(node))): d = decimal.Decimal(flist[i]) node[i]['freq'] = d nstr += ','+str(d) Data[nstr] = node # Throws a KeyError on invalid reference. Not caught because # the Python default error message is good enough and there's # nothing for the code to do with an error. #Fill reference tmp = chooseFrom(Data, node, depth+1, maxDepth) rets["val"] = rets["val"] + s[0] + tmp["val"] rets["freq"] = rets["freq"]*tmp["freq"] rets["path"].append(tmp["path"]) if s[0]: #If reference+literal text, insert for ch in other_channels: rets[ch] = rets.get(ch,"") + list[stop].get(ch,"") + tmp.get(ch, "") else: for ch in other_channels: rets[ch] = rets.get(ch,"") + tmp.get(ch, "") #No reference, only literal text else: rets["val"] = rets["val"] + s[0] for ch in other_channels: rets[ch] = rets.get(ch,"") + list[stop].get(ch,"") return rets def filterRE(RE): """Processes regex from file for use. Currently no-op.""" return RE def applyRE(Data, word, keepHistory=False, KHSep="→", endToken='\025'): """Applies regular expressions in Data to word.""" def doStagedMatchReplace(regexes, word): ret = [word] for stage in regexes: if "S" in stage: state = "S" cline = "" # print("begin: "+ret[-1]) if "reversed" in stage and stage["reversed"]&1: ret[-1] = ret[-1][::-1] for c in ret[-1]: if c in stage[state]: cline += stage[state][c][0] if len(stage[state][c])>1: state = stage[state][c][1] else: if "default" in stage[state]: cline += stage[state]["default"][0] if len(stage[state]["default"])>1: state = stage[state]["default"][1] else: cline += c if "return" in stage[state]: state = stage[state]["return"] if "end" in stage[state]: cline += stage[state]["end"] if "reversed" in stage and stage["reversed"]&1: ret[-1] = ret[-1][::-1] if "reversed" in stage and stage["reversed"]&2: cline = cline[::-1] ret.append(cline) else: for rule in stage: if "c" in rule: break rule["c"] = re.compile(filterRE(rule["m"])) cline = ret[-1] for rule in stage: cline = rule["c"].sub(rule["r"], cline) ret.append(cline) # cline = '' # for c in (ret[-1]+endToken): # cline += c # for rule in stage: # cline = rule["c"].sub(rule["r"], cline) # ret.append(cline[:cline.rfind(endToken)]) return ret ret = {} # for channel in word: # ret[channel] = [word[channel]] if "replace" in Data: for channel in Data["replace"]: if channel in word: ret[channel] = [word[channel]]+doStagedMatchReplace(Data["replace"][channel], word[channel]) else: # compatibility if "replacement" in Data: ret["val"] = [word[channel]]+doStagedMatchReplace(Data["replacement"], word["val"]) if "replaceIPA" in Data: ret["ipa"] = [word[channel]]+doStagedMatchReplace(Data["replaceIPA"], word["ipa"]) if keepHistory: for channel in ret: word[channel] = KHSep.join(ret[channel]) else: for channel in ret: word[channel] = ret[channel][-1] return word def listAll(Data, node, opts = { "ipa":True, "HTML":False, "path":False, "depth":-16, "keepHistory":False, "keepHistorySep":"→", "ignoreZeros":True, "freq":True, }): '''Traverse all descendants of node (base)''' def listWords(Data, node, depth, opts, path=[], flist=None): pass def nextPath(Data, node, path): return path # tmpbuf = [] ret = DFSPrint(listAllR( Data, node, opts["depth"], opts["ignoreZeros"] )) for word in ret: yield formatWord( applyRE(Data, { "val":word[0], "ipa":word[1], # DFSPrint doesn't work with paths "path":[0], "freq":word[2]}), opts) # newword = applyRE(Data, {"val":word[0], "ipa":word[1]}) # word = (newword["val"], newword["ipa"], word[2]) # tmpbuf.append(word[0]+' :\t'+word[1]+'\t'+str(word[2])) time.sleep(0.0001) #return '\n'.join(tmpbuf) def listAllR(Data, node, depth, ignoreZeros, path=[], flist=None): '''Implementation of listAll. Do not call.''' if node in path: return {"t": 'V', "node": node} elif depth < 0: path.append(node) list = [] if not flist: flist = [x.get("freq",decimal.Decimal('1')) for x in Data[node]] listSum = sum(flist) # print(str(node)+": "+str(listSum)+" = "+" + ".join([str(x) for x in flist])) if ignoreZeros: for i in range(len(Data[node])): if i < len(flist): # Copy Data[node][i] so that Data is not altered N = dict(Data[node][i]) N["freq"] = flist[i] if flist[i]: list.append(N) else: if Data[node][i].get("freq"): list.append(Data[node][i]) else: list = Data[node] matches = [] for child in list: # 1+ elements are strings and 1+ elements are references to arrays #Determine which is a string and which is a reference matches.append({ "t": 'A', "freq": child.get("freq",decimal.Decimal('1'))/listSum, "Acontents": [] }) #If no val, insert IPA anyway if not child.get("val",""): matches[-1]["Acontents"].append({"t": 'L', "val": '', "ipa": child.get("ipa","")}) else: for s in Formatter().parse(child["val"]): #Recurse on reference and insert results into string if s[1]: nstr = s[1] node = Data[s[1]] if s[2]: _ = re.match('[^0-9.]+',s[2]) if _: _ = _.end() flist = re.split('[^0-9.]+',s[2][_:]) nstr = s[1] for i in range(min(len(flist),len(node))): d = decimal.Decimal(flist[i]) node[i]['freq'] = d # Flist.append(d) nstr += ','+str(d) if not nstr in Data: Data[nstr] = node else: flist = None # Throws a KeyError on invalid reference. Not caught because # the Python default error message is good enough and # there's nothing for the code to do with an error. #Fill reference tmp = listAllR(Data, nstr, depth+1, ignoreZeros, path, None) if s[0]: #If reference+literal text, insert matches[-1]["Acontents"].append({"t": 'L', "val": s[0], "ipa": child.get("ipa","")}) matches[-1]["Acontents"].append(tmp) else: matches[-1]["Acontents"].append(tmp) #No reference, only literal text else: matches[-1]["Acontents"].append({"t": 'L', "val": s[0], "ipa": child.get("ipa","")}) #path.pop() return {"t": 'N', "node": node, "sum":listSum, "Ncontents": matches} else: #Recursion depth reached print("wordgen.py: recursion depth reached", file=sys.stderr) return {"t": 'T', "node": node, "raw": Data[node]} def DFSPrint(Node, freq=1): '''Generate list of words suitable for printing from tree structure.''' def f_A(Node, freq): # Main case buf1 = [("","", 1)] for n in Node["Acontents"]: tfreq = freq*Node["freq"] buf2 = DFSPrint(n, freq) # print('n: '+str(Node)) # print('1: '+str(buf1)) # print('2: '+str(buf2)) buf3 = [] for i in buf1: for j in buf2: # print('---\ni: '+str(i)+'\nj:'+str(j)+'\n---') buf3.append((i[0] + j[0], i[1] + j[1], i[2] * j[2])) # print('3: '+str(buf3)) buf1 = buf3 return buf1 def f_N(Node, freq): # Simply iterate and recurse ret = [] # N will always contain As for n in Node["Ncontents"]: ret.extend(DFSPrint(n, freq*n["freq"])) return ret def f_L(Node, freq): #Leaf #print('L: '+str(path)) return [(Node["val"], Node["ipa"], freq)] def f_V(Node, freq): # Turn into reference return [("{"+Node["node"]+"}", "{"+Node["node"]+"}", freq)] def f_T(Node, freq): # Truncation -- pretend it's L but different #print('T: '+str(freq)) return [("{"+Node["node"]+"}", "{"+Node["node"]+"}", freq)] switch = { 'A': f_A, 'N': f_N, 'L': f_L, 'V': f_V, 'T': f_T } return switch[Node['t']](Node, freq) def formatWord(word, opts, formatStr=None): '''Print words''' if formatStr is not None: # dbgWord = word.copy() # del dbgWord["path"] # del dbgWord["freq"] # print(dbgWord) return formatStr.format(**word) else: if not opts["HTML"]: fstr = "" for ch in opts["channels"]: fstr += "{"+ch+"}\t" word[ch] = word.get(ch,"") else: fstr = "" for ch in opts["channels"]: fstr += "{"+ch+"}" word[ch] = word.get(ch,"") fstr += "" word["val"] = word.get("val","") return formatWord(word,opts,fstr) # opts = { "ipa": opts.get("ipa", False), # "path": opts.get("path", False), # "HTML": opts.get("HTML", False), # "freq": opts.get("freq", False), # } # return formatWord(word, opts, # [ "{val}", # - # "{val}: \t{ipa}", # -p # "{val} \t{path}", # -P # "{val}: \t{ipa} \t{path}", # -pP # "{val}", # -H # "{val}{ipa}", # -pH # "{val}{path}", # -PH # "{val}{ipa}{path}", # -pPH # "{val} \t{freq}", # -f # "{val}: \t{ipa} \t{freq}", # -pf # "{val} \t{path} \t{freq}", # -Pf # "{val}: \t{ipa} \t{path} \t{freq}", # -pPf # "{val}{freq}", # -Hf # "{val}{ipa}{freq}", # -pHf # "{val}{path}{freq}", # -PHf # # -pPHf # "{val}{ipa}{path}{freq}", # ][int(opts.get("ipa",False)) # +int(opts.get("path",False))*2 # +int(opts.get("HTML",False))*4 # +int(opts.get("freq",False))*8 # ]) def printPath(path): def recurse(path): ret = gmpy2.mpz(path[0]).digits(62) for a in path[1:]: ret = ret + '[' + (recurse(a)) + ']' return ret return '+' + recurse(path) def readPath(pathStr): # Simple token separator function - recognizes + [ ] and alphanumerics def tokensOf(pathStr): ret = "" inInt = False for c in pathStr: if c in "[]": inInt = False if ret: yield ret ret = c elif c in "+": pass elif c in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz": # Ints are in base 62 # Int tokens can be multiple characters if inInt: ret = ret + c else: inInt = True if ret: yield ret ret = c else: # This shouldn't be hit -- raise ValueError? pass return ret def constructPath(tokens, i=0): ret = [] while i < len(tokens): if tokens[i] == "[": tret,ti = constructPath(tokens, i+1) ret.append(tret) if (ti == len(tokens)): pass #raise ValueError("Unterminated subpath", i, str(tokens), str(tokens[i:])) i = ti elif tokens[i] == "]": return ret,i else: ret.append(int(gmpy2.mpz(tokens[i], 62))) i += 1 return ret,i try: return constructPath(list(tokensOf(pathStr)))[0] except ValueError as err: # In case of error, report the full path in addition to the subpath err.args = err.args + (pathStr, ) raise def followPath(Data, node, path): # print(node) #root = [{"val": x["val"], "ipa": x.get("ipa",""), "freq":decimal.Decimal(x.get("freq",decimal.Decimal('1')))} for x in Data[node] if x.get("freq",decimal.Decimal('1'))] sumFreq = sum([decimal.Decimal(decimal.Decimal(x.get("freq",decimal.Decimal('1')))) for x in Data[node]]) SNode = {"val": Data[node][path[0]].get("val",""), "ipa": Data[node][path[0]].get("ipa",""), "freq":(decimal.Decimal(Data[node][path[0]].get("freq",decimal.Decimal('1')))/sumFreq)} rets = {"val":"", "ipa":"", "freq":decimal.Decimal('1')} #print(SNode) for i,s in enumerate(Formatter().parse(SNode["val"])): #Recurse on reference and insert results into string if s[1]: # Throws a KeyError on invalid reference. Not caught because # the Python default error message is good enough and there's # nothing for the code to do with an error. #Fill reference tmp = followPath(Data, s[1], path[i+1]) rets["val"] = rets["val"] + s[0] + tmp["val"] rets["freq"] = rets["freq"]*tmp["freq"] if s[0]: #If reference+literal text, insert rets["ipa"] = rets["ipa"] + SNode["ipa"] + tmp["ipa"] else: rets["ipa"] = rets["ipa"] + tmp["ipa"] #No reference, only literal text else: rets["val"] = rets["val"] + s[0] rets["ipa"] = rets["ipa"] + SNode["ipa"] return rets def toBNF(Data, StartDef): nodes = Data.copy() for N in set(["replace", "replaceIPA", "replacement", "channels"]): nodes.pop(N) #if 2 < len(sys.argv) < 7: #sys.argv.extend([0]*(7-len(sys.argv))) # #Print $3 descendants of $2 to depth $6 from datafile $1 with mode flags $4 and $5 # printWords(makeWords(yaml.safe_load(open(sys.argv[1],'r', encoding="utf8")), int(sys.argv[3]), sys.argv[2], int(sys.argv[6])), int(sys.argv[4]), int(sys.argv[5])) # #if not int(sys.argv[3]): # #print(yaml.dump(listAllR(yaml.safe_load(open(sys.argv[1],'r', encoding="utf8")), sys.argv[2], int(sys.argv[6]), True, []))) # print(listAll(yaml.safe_load(open(sys.argv[1],'r', encoding="utf8")), sys.argv[2], int(sys.argv[6]))) def main(): global expansionCount # class Dec(decimal.Decimal, yaml.YAMLObject): # yaml_loader = yaml.SafeLoader # yaml_dumper = yaml.SafeDumper # yaml_tag = u'!d' # def __init__(self, data): # Decimal(self, value=data[1:]) # def __repr__(self): # return "d"+super().__repr__() def dec_repr(dumper, data): return dumper.represent_scalar(u'!d', 'd'+str(data)) yaml.Dumper.add_representer(decimal.Decimal, dec_repr) yaml.SafeDumper.add_representer(decimal.Decimal, dec_repr) # yaml.Loader.add_implicit_resolver(u'!d', re.compile(r'd\d+'), ['d']) yaml.Loader.add_implicit_resolver(u'!d', re.compile(r'd\d*\.?\d+'), ['d']) # yaml.SafeLoader.add_implicit_resolver(u'!d', re.compile(r'd\d+'), ['d']) yaml.SafeLoader.add_implicit_resolver(u'!d', re.compile(r'd\d*\.?\d+'), ['d']) def dec_cons(loader, node): return decimal.Decimal(loader.construct_scalar(node)[1:]) yaml.Loader.add_constructor(u'!d', dec_cons) yaml.SafeLoader.add_constructor(u'!d', dec_cons) parser = optparse.OptionParser( usage="usage: %prog [options] [command]\n" " [command] may be either 'gen' [default], 'list', or 'diag'") genGroup = optparse.OptionGroup(parser, "Options for gen") listGroup = optparse.OptionGroup(parser, "Options for list") diagGroup = optparse.OptionGroup(parser, "Options for diag") debugGroup = optparse.OptionGroup(parser, "Debugging options") parser.add_option("-c", "--channel", dest="channels", action="append", metavar="CHANNEL", default=[], help="print CHANNEL (can be used multiple times)") parser.add_option("-p", "--ipa", dest="channels", action="append_const", const="ipa", help="print IPA transcriptions (-c ipa)") parser.add_option("-d", "--depth", dest="depth", type="int", default=16, help="maximum recursion depth [default: %default]") parser.add_option("-H", "--html", dest="HTMLmode", action="store_true", default=False, help="write output as HTML table") genGroup.add_option("-n", dest="num", type="int", default=1, metavar="numWords", help="number of words to generate") genGroup.add_option("-V", dest="noVal", action="store_true", default=False, help="Suppress implicit 'val' printing") genGroup.add_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="Disable printing of the header") genGroup.add_option("-F", "--fmt", dest="fstr", type="string", metavar="FMT_STR", default=[], help="Format string for printing words") parser.add_option_group(genGroup) listGroup.add_option("-0", "--listZeros", dest="ignoreZeros", action="store_false", default=True, help="include 0-frequency values in list") parser.add_option_group(listGroup) diagGroup.add_option("--regex", dest="dbgRE", action="store_true", default=False, help="Dump regular expressions after filtering.") diagGroup.add_option("--nodes", dest="dbgNodes", action="store_true", default=False, help="Dump switching nodes after filtering.") diagGroup.add_option("--retest", dest="dbgRETest", action="store_true", default=False, help="Apply regexes for CHANNELS to input.") diagGroup.add_option("--bnf", dest="dbgBNFExport", action="store_true", default=False, help="Export to BNF (val only).") parser.add_option_group(diagGroup) debugGroup.add_option("-P", "--path", dest="channels", action="append_const", const="path", help="print paths for generated words") debugGroup.add_option("-K", "--keepHistory", dest="keepHistory", action="store_true", default=False, help="save every step of regex application\n" "May be hard to read.") debugGroup.add_option("--KHSep", dest="KHSep", type="string", default="→", metavar="SEP", help="what to insert between regex applications") debugGroup.add_option("-r", "--seed", dest="seed", action="store", default=None, help="random seed") debugGroup.add_option("-f", "--showFreqs", dest="showFreqs", action="store_true", default=False, help="show calculated frequencies (-c freq)") parser.add_option_group(debugGroup) (options, args) = parser.parse_args() if len(args) < 2: parser.error("Not enough arguments") if len(args) < 3: args.append("gen") if "ipa" in options.channels: options.IPAmode = True else: options.IPAmode = False if "path" in options.channels: options.path = True else: options.path = False if "freq" in options.channels: options.showFreqs = True if not options.noVal: options.channels = ['val'] + options.channels opts = { "HTML":options.HTMLmode, "path":options.path, "depth":-1*options.depth, "keepHistory":options.keepHistory, "keepHistorySep":options.KHSep, "ignoreZeros":options.ignoreZeros, "freq":options.showFreqs, "channels":options.channels, } random.seed(options.seed) Data = yaml.safe_load(open(args[0],'r', encoding="utf8")) # Data = if args[2] == "gen": if False: # Debug stuff try: word = applyRE(Data, chooseFrom(Data, Data[args[1]], -1*options.depth, options.depth), options.keepHistory, options.KHSep) # print(word["path"]) # print(formatWord(word, opts)) P = Path(Data, args[1], word["path"]) # print(repr(P)) # print('P'+P._str()) # print('O'+printPath(word["path"])) # print('L'+printPath(P._list())) print(P._list()) print(P.getWord()) print(next(P)._list()) print(P.getWord()) # print(applyRE(Data, followPath(Data, args[1], word["path"]))) # print(printPath(word["path"])) # print(readPath(printPath(word["path"]))) # print(followPath(Data, args[1], readPath(printPath(word["path"])))) # words = makeWords(Data, options.num, args[1], # -1*options.depth, options.keepHistory, options.KHSep) # printWords(words, options.IPAmode, options.HTMLmode, options.printPaths) except Exception as e: print(e) Header = "" if "channels" not in Data: Data["channels"] = {"val":"Words","ipa":"IPA","path":"Path"} if options.HTMLmode: Header = "" for ch in options.channels: Header += "" Header += "" print(Header) else: if not options.quiet: for ch in options.channels: Header += Data["channels"].get(ch,"")+'\t' print(Header) print('-'*40) for i in range(options.num): word = applyRE(Data, chooseFrom(Data, Data[args[1]], -1*options.depth), options.keepHistory, options.KHSep) # print(expansionCount) expansionCount = 0 print(formatWord(word, opts)) if options.HTMLmode: print("
"+Data["channels"].get(ch,"")+"
") elif args[2] == "list": for word in listAll(Data, args[1], opts): print(word) elif args[2] == "diag": if options.dbgRE: if "replace" in Data: for channel in Data["replace"]: print(channel+':') for stage in Data["replace"][channel]: print(' [') for rule in stage: print(' {'+"m: {m}, r: {r}".format(m=repr(filterRE(rule['m'])),r=repr(rule['r']))+'}') print(' ]') if options.dbgNodes: G = SwitchingGraph(Data) G.addNode(":arg", [{"val":args[1]}]) print(repr(G)) print(repr(G[":arg"])) if options.dbgRETest: for ch in options.channels: pass if options.dbgBNFExport: print('-'*40) print(toBNF(Data)) print('-'*40) main()