import sys
import random
import time
import re
import optparse
import copy
import collections
import html
import operator
from unicodedata import normalize

# import pdb

import gmpy2
import yaml
from tr import tr

sep = re.compile('[^0-9.]*[^0-9.+*=]')
sep2 = re.compile('[^0-9.:]*[^0-9.:]')
aChar = re.compile('(.)')
expansionCount = 0
one = float('1')
channels = {}
retable = dict()


def refParse(refstr):
	"""Basically copy Formatter().parse() but treat ! differently:
	Only one of : or ! is allowed to follow the refname, and |
	is used to separate arguments. Also, the subtle points of
	{{ and }} interpretation are surely different.
	"""
	lit = ""
	name = None
	flist = None
	ilist = None
	args = []
	state = "lit"
	ostate = "lit"
	for c in refstr:
		# print((c,state))
		if state == "lit":
			if c == "{":
				ostate = state
				state = "{"
			elif c == "}":
				ostate = state
				state = "}"
			elif c == '\\':
				ostate = state
				state = "esc"
			else:
				lit += c
		elif state == "{":
			if c == "{":
				if ostate == "lit":
					lit += c
				elif ostate == "name":
					name += c
				elif ostate == "flist":
					flist += c
				elif ostate == "ilist":
					ilist += c
				elif ostate == "args":
					args[-1] += c
				state = ostate
			elif c == "}":
				if ostate == "lit":
					state = "}"
					ostate = "name"
					name = ""
				else:
					raise ValueError("{ encountered in reference")
			elif c == ":":
				name = ""
				state = "flist"
				flist = ""
			elif c == "!":
				name = ""
				state = "ilist"
				ilist = ""
			elif c == "|":
				name = ""
				state = "args"
				args = [""]
			elif c == "\\":
				name = ""
				ostate = state
				state = "esc"
			else:
				state = "name"
				name = c
		elif state == "}":
			if c == "}":
				if ostate == "lit":
					lit += c
				elif ostate == "name":
					name += c
				elif ostate == "flist":
					flist += c
				elif ostate == "ilist":
					ilist += c
				elif ostate == "args":
					args[-1] += c
				state = ostate
			else:
				if ostate == "lit":
					raise ValueError("Single '}' encountered", refstr)
				elif c == "{":
					yield (lit, name, flist, ilist, args)
					state = "{"
					lit = ""
					name = None
					flist = None
					ilist = None
					args = None
				else:
					yield (lit, name, flist, ilist, args)
					state = "lit"
					lit = c
					name = None
					flist = None
					ilist = None
					args = None
		elif state == "name":
			if c == ":":
				state = "flist"
				flist = ""
			elif c == "!":
				state = "ilist"
				ilist = ""
			elif c == "|":
				state = "args"
				args = [""]
			elif c == "\\":
				ostate = state
				state = "esc"
			elif c == "}":
				ostate = state
				state = "}"
			elif c == "{":
				ostate = state
				state = "{"
			else:
				name += c
		elif state == "flist":
			if c == "}":
				ostate = state
				state = "}"
			elif c == "{":
				ostate = state
				state = "{"
			else:
				flist += c
		elif state == "ilist":
			if c == "}":
				ostate = state
				state = "}"
			elif c == "{":
				ostate = state
				state = "{"
			else:
				ilist += c
		elif state == "args":
			if c == ":":
				state = "flist"
				flist = ""
			elif c == "!":
				state = "ilist"
				ilist = ""
			elif c == "|":
				args.append("")
			elif c == "}":
				ostate = state
				state = "}"
			elif c == "{":
				ostate = state
				state = "{"
			elif c == "\\":
				ostate = state
				state = "esc"
			else:
				args[-1] += c
		elif state == "esc":
			if c in "\\\"':!|{}<>":
				if ostate == "lit":
					lit += c
				elif ostate == "name":
					name += c
				elif ostate == "flist":
					flist += c
				elif ostate == "ilist":
					ilist += c
				elif ostate == "args":
					args[-1] += c
				# if ostate == "args":
					# args[-1] = c
				# elif ostate == "name":
					# name += c
				state = ostate
			else:
				raise ValueError("Illegal escape", "\\" + c, refstr)
	if state == "lit":
		if not args:
			args = None
		yield (lit, name, flist, ilist, args)
	elif state == "}":
		if ostate != "lit":
			if not args:
				args = None
			yield (lit, name, flist, ilist, args)
		else:
			raise ValueError("stray '}' outside reference")
	elif state == "esc":
		raise ValueError(("Unterminated escape: state = "
			+ repr(state) + " ostate = " + repr(ostate)),
			(" values = " + repr((lit, name, flist, ilist, args))),
			refstr)
	else:
		raise ValueError(("Unterminated reference: state = "
			+ repr(state) + " ostate = " + repr(ostate)),
			(" values = " + repr((lit, name, flist, ilist, args))),
			refstr)


def argExpand_stack(arg, args, depth, maxDepth):
	"""Prototype stack-based expander, for recursive argrefs.
		Not yet used.
	"""
	argref_cexpr = re.compile(r"([^<>()]*)\(([^<>]*)\)")
	tok_literal = 1
	tok_push_func = 2
	tok_call_func = 3
	tok_arg = 4
	# tok_ellipsis = 5
	token = collections.namedtuple('token', ['type', 'text'])

	def get_tokens(arg):
		type = tok_literal
		text = ""
		for ch in arg:
			if ch == '#' and not text:
				type = tok_arg
			elif ch == '|':
				yield token(type, text)
				text = ""
				type = tok_literal
			elif ch == ')':
				yield token(type, text)
				yield token(tok_call_func, ')')
				text = ""
				type = tok_literal
			elif ch == '(':
				if type == tok_literal:
					yield token(tok_push_func, text)
					text = ""
					type = tok_literal
				elif type == tok_arg:
					raise ValueError('#' + text + " cannot be used as function name")
			else:
				text = text + ch
		return token(type, text)

	# Set up the "registers" for the hybrid stack machine
	registers = {
		# Special registers:
		# for escaping
		'p': '|',
		'lt': '<',
		'gt': '>',
		'b': "\\",
		'lb': '\\{',
		'rb': '\\}',
		# Expansion depth of the node being expanded
		'd': depth,
		# Maximum depth
		'D': maxDepth,
		# Number of arguments passed to this template
		'c': len(args['a']),
		# Number of arguments declared
		'C': args['c'],
		# Full list of declared arguments
		'a': args[:args['c']],
		# 'Extra' arguments
		'...': args['a'][args['c']:],
		# Full list of arguments
		'A': args['a'],
	}
	for i in range(len(args['a'])):
		registers[str(i)] = args[i]

	def valOf(a, b):
		pass

	# Declare the instruction set
	def mather(op, init, regs, *vals):
		acc = init
		for v in vals:
			if isinstance(v, list):
				for i in v:
					acc = op(acc, float(i))
			else:
				acc = op(acc, float(v))
		return str(int(acc)) if acc.is_integer() else str(acc)

	def flatten(ln):
		for el in ln:
			if (isinstance(el, collections.Iterable)
				and not isinstance(el, (str, bytes))):
				yield from flatten(el)
			else:
				yield el

	def pn(regs, *params):
		ops = {
			"+": lambda a, b: float(a) + float(b),
			"-": lambda a, b: float(a) - float(b),
			"*": lambda a, b: float(a) * float(b),
			"/": lambda a, b: float(a) / float(b),
			".": lambda a, b: a + b,
			"^": lambda a, b: float(a) ** float(b),
		}
		opstack = []
		valstack = []
		pending = False

		exparams = flatten(params)
		for p in exparams:
			if p in ops:
				opstack.append(p)
				pending = False
			else:
				if pending:
					while len(valstack):
						v1 = valstack.pop()
						op = opstack.pop()
						try:
							p = str(ops[op](v1, p))
						except ZeroDivisionError as err:
							err.args += v1, p
				valstack.append(p)
				pending = True
		return valstack.pop()

	def comparer(op, regs, a, b, t, f):
		return t if op(a, b) else f

	def repeat(regs, *params):
		acc = ""
		params = list(params)
		# Handle odd lists
		params.append("1")
		for p in zip(params[::2], params[1::2]):
			acc += p[0] * int(p[1])
		return acc

	def oneof(args, *params):
		exparams = flatten([valOf(args, p) for p in params])
		return random.choice(exparams)

	def truthy(regs, val):
		if not val:
			return False
		if not float(val):
			return False
		return True

	funs = {
		# Length of a list
		"len": lambda r, v: str(len(v) if isinstance(v, list) else 1),
		# Math functions
		"+": lambda r, *v: mather(operator.add, 0, r, *v),
		"-": lambda r, *v: mather(operator.sub, 0, r, *v),
		"*": lambda r, *v: mather(operator.mul, 1, r, *v),
		"/": lambda r, *v: mather(operator.truediv, 1, r, *v),
		"^": lambda r, *v: mather(operator.pow, 1, r, *v),
		# Polish notation evaluator
		"math": pn,
		"calc": pn,
		# Branching functions
		"if": lambda r, v, t, f: t if truthy(r, v) else f,
		"gt": lambda *v: comparer(operator.gt, *v),
		"gte": lambda *v: comparer(operator.ge, *v),
		"lt": lambda *v: comparer(operator.lt, *v),
		"lte": lambda *v: comparer(operator.le, *v),
		"=": lambda *v: comparer(operator.eq, *v),
		"eq": lambda *v: comparer(operator.eq, *v),
		"!=": lambda *v: comparer(operator.ne, *v),
		"ne": lambda *v: comparer(operator.ne, *v),
		# Logic
		"not": lambda r, v: not truthy(r, v),
		# Indexing functions
		"?": lambda r, c, *v: v[c],
		"i": lambda r, c, *v: v[c],
		"which": lambda r, c, *v: v.index(c),
		# Why does this exist?
		"num": lambda r, v: str(float(v)),
		# String concatenation
		".": lambda r, *v: "".join(v),
		".*": repeat,
		"oneof": oneof,
	}

	if argref_cexpr.fullmatch(arg):
		datastack = []
		funcstack = []
		argstack = []
		for tok in get_tokens(arg):
			if tok.type == tok_push_func:
				funcstack.append(tok.text)
				argstack.append(0)
			elif tok.type == tok_literal:
				datastack.append(tok.text)
				argstack[-1] += 1
			elif tok.type == tok_arg:
				datastack.append(registers[tok.text])
				argstack[-1] += 1
			elif tok.type == tok_call_func:
				count_args = argstack.pop()
				computed_args = datastack[-count_args:]
				del datastack[-count_args:]
				fun = funcstack.pop()
				datastack.append(funs[fun](registers, *computed_args))
				argstack[-1] += 1
		if len(argstack) > 0:
			raise ValueError("Unterminated function")
		if len(datastack) > 1:
			raise ValueError("| encountered outside function")
		assert(len(datastack) == 1)
		ret = datastack.pop()
		if isinstance(ret, list):
			ret = '|'.join(ret)
		return ret
	elif arg == "...":
		return '|'.join(args['a'][args['c']:])
	else:
		return registers[arg]


def argsExpand(argstr, args, depth, maxDepth):
	argref = re.compile(r"(<<|>>|[^<>]*)(<([^<>]*)>)?")
	argfun = re.compile(r"([^<>()]*)\(([^<>()]*)\)")
	outstr = ""
	# print(argstr, args)
	# print(argstr, argref.findall(argstr))

	global expansionCount
	def valOf(args, val):
		registers = {
			# Special registers:
			# for escaping
			'p': '|',
			'lt': '<',
			'gt': '>',
			'b': "\\",
			'lb': '\\{',
			'rb': '\\}',
			# Expansion depth of the node being expanded
			'd': depth,
			# Maximum depth
			'D': maxDepth,
			'e': expansionCount,
			'E': maxDepth ** 2,
			# Number of arguments passed to this template
			'c': len(args['a']),
			# Number of arguments declared
			'C': args['c'],
			# Full list of declared arguments
			'a': args['a'][:args['c']],
			# 'Extra' arguments
			'...': args['a'][args['c']:],
			# Full list of arguments
			'A': args['a'],
		}

		def impl(args, val):
			if len(val) == 0:
				return val
			if val[0] == "#":
				if val[1:] in registers:
					return str(registers[val[1:]])
				else:
					return args['a'][int(val[1:])]
			elif val == "...":
				return args['a'][args['c']:]
			elif argfun.fullmatch(val):
				fun, argname = argfun.fullmatch(val).groups()
				return funs[fun](args, *argname.split('|'))
			else:
				return val
		v = impl(args, val)
		# print("Computed: ", repr(val), " = ", repr(v), file=sys.stderr)
		return v

	def mather(op, init, a, *vals):
		if init is None:
			acc = float(valOf(args, vals[0]))
			for v in vals[1:]:
				if v == "...":
					for i in range(a['c'], len(a['a']), 1):
						acc = op(acc, float(valOf(args, a['a'][i])))
				else:
					acc = op(acc, float(valOf(args, v)))
				# elif v[0] == "#":
				# 	acc = op(acc, float(a['a'][int(v[1:])]))
				# else:
				# 	acc = op(acc, float(v))
			return str(int(acc)) if acc.is_integer() else str(acc)
				
		else:
			acc = init
			for v in vals:
				if v == "...":
					for i in range(a['c'], len(a['a']), 1):
						acc = op(acc, float(a['a'][i]))
				else:
					acc = op(acc, float(valOf(args, v)))
				# elif v[0] == "#":
				# 	acc = op(acc, float(a['a'][int(v[1:])]))
				# else:
				# 	acc = op(acc, float(v))
			return str(int(acc)) if acc.is_integer() else str(acc)

	def select(args, choose, *vals):
		return valOf(args, vals[valOf(args, choose)])
		# if choose[0] != "#":
		# 	raise ValueError("? expects first argument to be an argument")
		# return vals[int(args['a'][int(choose[1:])])]

	def which(args, choose, *vals):
		return [valOf(args, _) for _ in vals].index(valOf(args, choose))
		# if choose[0] != "#":
		# 	raise ValueError("which expects first argument to be an argument")
		# return vals.index(args['a'][int(choose[1:])])

	def num(args, val):
		return str(float(valOf(args, val)))

	def greater(args, var, pivot, t, f):
		if valOf(args, var) > valOf(args, pivot):
			return valOf(args, t)
		else:
			return valOf(args, f)

	def lesser(args, var, pivot, t, f):
		if valOf(args, var) < valOf(args, pivot):
			return valOf(args, t)
		else:
			return valOf(args, f)

	def equal(args, var, pivot, t, f):
		if valOf(args, var) == valOf(args, pivot):
			return valOf(args, t)
		else:
			return valOf(args, f)

	def concat(args, *params):
		return "".join([valOf(args, p) for p in params])

	def repeat(args, *params):
		acc = ""
		# Handle odd lists
		params = list(params)
		params.append("1")
		for p in zip(params[::2], params[1::2]):
			acc += valOf(args, p[0]) * int(valOf(args, p[1]))
		return acc

	def flatten(ln):
		for el in ln:
			if (isinstance(el, collections.abc.Iterable)
				and not isinstance(el, (str, bytes))):
				yield from flatten(el)
			else:
				yield el

	def pn(args, *params):
		ops = {
			"+": lambda a, b: float(a) + float(b),
			"-": lambda a, b: float(a) - float(b),
			"*": lambda a, b: float(a) * float(b),
			"/": lambda a, b: float(a) / float(b),
			".": lambda a, b: a + b,
			"^": lambda a, b: float(a) ** float(b),
		}
		opstack = []
		valstack = []
		pending = False
		exparams = flatten([valOf(args, p) for p in params])
		for _ in exparams:
			p = valOf(args, _)
			if p in ops:
				opstack.append(p)
				pending = False
			else:
				if pending:
					while len(valstack):
						v1 = valstack.pop()
						op = opstack.pop()
						try:
							p = str(ops[op](v1, p))
						except ZeroDivisionError as err:
							err.args += v1, p
							raise
				valstack.append(p)
				pending = True
		return valstack.pop()

	def rpn(args, *params):
		ops = {
			"+": lambda a, b: float(a) + float(b),
			"-": lambda a, b: float(a) - float(b),
			"*": lambda a, b: float(a) * float(b),
			"/": lambda a, b: float(a) / float(b),
			".": lambda a, b: a + b,
			"^": lambda a, b: float(a) ** float(b),
		}
		valstack = []
		exparams = flatten([valOf(args, p) for p in params])
		for _ in exparams:
			p = valOf(args, _)
			if p in ops:
				v2 = valstack.pop()
				v1 = valstack.pop()
				try:
					valstack.append(str(ops[p](v1, v2)))
				except ZeroDivisionError as err:
					err.args += v1, v2, p
					raise
			else:
				valstack.append(p)
		return valstack.pop()

	def mapper(args, text, set1, set2):
		return tr(valOf(args, set1), valOf(args, set2), valOf(args, text))

	def replacer(args, text, *params):
		params = list(params)
		text = valOf(args, text)
		if len(params) % 2:
			raise ValueError("'replace' requires its arguments to be in pairs.")
		for p in zip(params[::2], params[1::2]):
			m = valOf(args, p[0])
			if m not in retable:
				retable[m] = re.compile(m)
			text = re.sub(retable[m], valOf(args, p[1]), text)
		return text

	def oneof(args, *params):
		exparams = flatten([valOf(args, p) for p in params])
		return random.choice(exparams)

	def truthy(args, arg):
		val = valOf(args, arg)
		if not val:
			return False
		if not float(val):
			return False
		return True

	funs = {
		# "len": lambda a, v: "1" if not v == "..." else str(len(a['a']) - a['c']),
		"len": lambda a, v: str(len(a['a']) - a['c']) if v == "..." else "1",
		"+": lambda a, *v: mather(operator.add, 0, a, *v),
		"-": lambda a, *v: mather(operator.sub, None, a, *v),
		"*": lambda a, *v: mather(operator.mul, 1, a, *v),
		"/": lambda a, *v: mather(operator.truediv, None, a, *v),
		"^": lambda a, *v: mather(operator.pow, 1, a, *v),
		"?": select,
		"which": which,
		"num": num,
		"if": lambda r, v, t, f: t if truthy(r, v) else f,
		"gt": greater,
		"lt": lesser,
		"=": equal,
		"eq": equal,
		".": concat,
		".*": repeat,
		"math": pn,
		"calc": pn,
		"pn": pn,
		"rpn": rpn,
		"map": mapper,
		"replace": replacer,
		"oneof": oneof,
		None: lambda a, v: v
	}
	for m in argref.finditer(argstr):
		# print(m.groups())
		lit, _, arg = m.groups("")
		outstr += lit
		if argfun.fullmatch(arg):
			fun, argname = argfun.fullmatch(arg).groups()
			outstr += funs[fun](args, *argname.split('|'))
		elif arg == "...":
			outstr += '|'.join(args['a'][args['c']:])
		elif arg:
			outstr += valOf(args, '#' + arg)
	return outstr


def processRef(Data, s):
	node = None
	subargs = {'c': 0, 'a': []}
	if s[1] is not None:
		ref = s[1]
		if s[4] is not None:
			if ref + '|' * len(s[4]) in Data:
				ref += '|' * len(s[4])
				subargs = {'c': len(s[4]), 'a': s[4]}
			else:
				for a in range(len(s[4]), 0, -1):
					if ref + '|' * a + '...' in Data:
						ref += '|' * a + '...'
						break
				else:
					raise KeyError("Node " + ref + " with "
						+ str(len(s[4])) + " arguments not found")
				subargs = {'c': a, 'a': s[4]}
		# print(ref)
		node = copy.deepcopy(Data[ref])
		if s[2]:
			_ = re.match(sep, s[2])
			if _:
				_ = _.end()
			flist = re.split(sep, s[2][_:])
			# NYI
			# mode = "assign"
			for i in range(min(len(flist), len(node))):
				if flist[i][0] == "*":
					# mode = "multiply"
					flist[i] = flist[i][1:]
				elif flist[i][0] == "+":
					# mode = "add"
					flist[i] = flist[i][1:]
				elif flist[i][0] == "=":
					# mode = "assign"
					flist[i] = flist[i][1:]
				d = float(flist[i])
				node[i]['freq'] = d
				# nstr += ','+str(d)
			# Data[nstr] = node
		elif s[3]:
			# print(s[3])
			_ = re.match(sep2, s[3])
			if _:
				_ = _.end()
			ilist = re.split(sep2, s[3][_:])
			# print(ilist)
			node2 = []
			for i in ilist:
				ival = i.split(":")
				# print(node)
				node2.append(node[int(ival[0])])
				if len(ival) > 1:
					node2[-1]['freq'] = float(ival[1])
			node = node2
	return (s[0], node, subargs)


def showNodes(Data, nodes, depth=-16, maxDepth=16):
	for s in refParse(nodes):
		_, node, args = processRef(Data, s)
		if args:
			onode = copy.deepcopy(node)
			for x in node:
				oldliteral = copy.deepcopy(x)
				if args:
					for ch in x:
						if isinstance(x[ch], str):
							try:
								x[ch] = argsExpand(x[ch], args, -depth, maxDepth)
							except ValueError as err:
								err.args += x,
								raise
			x["val"] = x.get("val", "")
			try:
				x["freq"] = float(x.get("freq", one))
			except ValueError as err:
				err.args += args, {'Original': oldliteral}, x,
				raise
			if x["freq"] < 0:
				x["freq"] = 0
			# Ensure that every channel named exists
			for ch in channels:
				x[ch] = x.get(ch, "")
			print("Original: ", onode)
		print("Computed: ", node)


def chooseFrom(Data, branches, depth=-16, maxDepth=16, args=None):
	"""Select a random value from the branches, recursing
		on references.
		This function implements the essential algorithm of wordgen.
	"""
	global expansionCount
	specialChannels = set(["val", "freq", "path"])
	# print(branches)
	if args is None:
		args = {'c': 0, 'a': []}

	if isinstance(branches, dict):
		branches["val"] = branches.get("val", "")
		branches["freq"] = one
		branches["path"] = None
		return branches
	expansionCount += 1
	obranch = copy.deepcopy(branches)
	for x in branches:
		oldliteral = copy.deepcopy(x)
		for ch in x:
			if isinstance(x[ch], str):
				try:
					x[ch] = argsExpand(x[ch], args, maxDepth + depth, maxDepth)
				except ValueError as err:
					err.args += x,
					raise
	# print(branches)
	# for x in branches:
		x["val"] = x.get("val", "")
		try:
			x["freq"] = float(x.get("freq", one))
		except ValueError as err:
			err.args += args, {'Original': oldliteral}, x,
			raise
		if x["freq"] < 0:
			x["freq"] = 0
		# Ensure that every channel named exists
		for ch in channels:
			x[ch] = x.get(ch, "")
	branchesSum = sum([x["freq"] for x in branches])
	if not branchesSum:
		rets = {
			"val": "",
			"path": [0],
			"freq": 1
		}
		for ch in channels:
			rets[ch] = rets.get(ch, "")
		return rets
	a = float(random.uniform(0, float(branchesSum)))
	stop = 0
	# This needs no normalization because values are never directly compared.
	for i, c in enumerate([x["freq"] for x in branches]):
		a -= c
		if a <= 0:
			stop = i
			break

	obranch = obranch[stop]
	other_channels = (
		set([_ for _ in branches[stop]]) - specialChannels
	)
	if "path" in branches[stop]:
		pass
	if expansionCount >= maxDepth ** 2:
		# Expansion limit reached
		print("wordgen.py: expansion limit reached", file=sys.stderr)
		rets = {
			"val": branches[stop]["val"],
			"path": [0],
			"freq": branches[stop]["freq"] / branchesSum
		}
		for ch in other_channels:
			rets[ch] = branches[stop].get(ch, "")
		return rets
	elif depth >= 0:
		# Recursion limit reached
		print("wordgen.py: depth limit reached", file=sys.stderr)
		rets = {
			"val": branches[stop]["val"],
			"path": [0],
			"freq": branches[stop]["freq"] / branchesSum
		}
		for ch in other_channels:
			rets[ch] = branches[stop].get(ch, "")
		return rets
	rets = {"val": "", "freq": one / branchesSum, "path": [stop]}
	# If val is empty, simply return the other channels
	if not branches[stop]["val"]:
		for ch in other_channels:
			rets[ch] = branches[stop].get(ch, "")
		return rets
	# Determine which is a string and which is a reference
	# print(branches[stop]["val"])
	# print(list(refParse(branches[stop]["val"])))
	try:
		for s in refParse(branches[stop]["val"]):
			# Recurse on reference and insert results into string
			# print(s)
			text, node, subargs = processRef(Data, s)
			if text:
				rets["val"] = rets["val"] + text
				for ch in other_channels:
					rets[ch] = rets.get(ch, "") + branches[stop].get(ch, "")
			if node:
				# Throws a KeyError on invalid reference. Not caught
				# because the Python default error message is good
				# enough and there's nothing for the code to do with
				# an error.

				# Fill reference
				tmp = chooseFrom(Data, node, depth + 1, maxDepth, subargs)
				other_channels.update(
					set([_ for _ in tmp]) - specialChannels
				)

				rets["val"] = rets["val"] + tmp["val"]
				rets["freq"] = rets["freq"] * tmp["freq"]
				rets["path"].append(tmp["path"])

				for ch in other_channels:
					rets[ch] = rets.get(ch, "") + tmp.get(ch, "")
	except ValueError as err:
		err.args += args, obranch,
		raise
	return rets


def filterRE(RE):
	"""Processes regex from file for use. Currently no-op."""
	return RE


def applyRE(Data, word, keepHistory=False, KHSep=" → "):
	"""Applies regular expressions in Data to word."""

	def doStagedMatchReplace(regexes, word, fullword):

		def defaultPlaceholder(defStr, c):
			# return aChar.sub(str, c)
			out = ""
			for t in refParse(defStr):
				out += t[0]
				if t[1] is not None:
					out += c
			return out

		def matchesSet(set, c):
			return True if tr(set, "", c, "cd") else False

		def doMaps(maps, matches, c):
			def doFSMMatch(map1, map2, c, S):
				if tr(map1, "", c, "cd"):
					return (True, tr(map1, map2, c), S)
				return (False, "", None, None)

			for map in maps:
				m = doFSMMatch(map[0], map[1], c, map[2] if len(map) > 2 else None)
				if m[0]:
					return (m[1], m[2])
			for match in matches:
				if matchesSet(match[0], c):
					return (c, match[1])
			return False

		ret = [word]
		for stage in regexes:
			if isinstance(stage, dict) and "S" in stage:
				# Order of Operations:
				# Most specific rule first:
				# 1. character rules
				# 2. set rules
				# 	like generalized character rules
				# 3. map rules
				# 4. match rules (like maps with set2 = set1)
				# 5. default rule
				# 6. return rule
				# 	(equivalent to default: ["{}", <dest>])

				# pdb.set_trace()
				state = "S"
				cline = ""
				# print("begin: "+ret[-1])
				if "reversed" in stage and stage["reversed"] & 1:
					ret[-1] = ret[-1][::-1]
				for c in ret[-1]:
					s = stage[state]
					m = doMaps(s.get("map", []), s.get("match", []), c)
					if c in s:
						r = s[c]
						cline += defaultPlaceholder(r[0], c)
						if len(r) > 1:
							state = r[1]
					elif "set" in s:
						for r in s["set"]:
							if matchesSet(r[0], c):
								cline += defaultPlaceholder(r[1], c)
								if len(r) > 2:
									state = r[2]
					elif m:
						cline += m[0]
						if m[1]:
							state = m[1]
					elif "default" in s:
						r = s["default"]
						cline += defaultPlaceholder(r[0], c)
						if len(r) > 1:
							state = r[1]
					else:
						cline += c
						if "return" in s:
							state = s["return"]
				if "end" in stage[state]:
					cline += stage[state]["end"]
				if "reversed" in stage and stage["reversed"] & 1:
					# unreverse the input
					ret[-1] = ret[-1][::-1]
				if "reversed" in stage and stage["reversed"] & 2:
					cline = cline[::-1]
				ret.append(cline[:])
			elif isinstance(stage, dict) and "normalize" in stage:
				form = stage["normalize"]
				if form == "default":
					form = "NFC"
				ret.append(normalize(form.upper(), ret[-1]))
			elif isinstance(stage, dict) and "repeat" in stage:
				pass
			elif isinstance(stage, list) and len(stage) > 0 and "m" in stage[0]:
				for rule in stage:
					if "c" in rule:
						# not continue because rules are never added
						break
					rule["c"] = re.compile(filterRE(rule["m"]))
				cline = ret[-1]
				for rule in stage:
					cline = rule["c"].sub(rule["r"], cline)
				ret.append(cline[:])
			elif "assign" in stage:
				tmp = ""
				for ref in refParse(stage["assign"]):
					tmp += ref[0]
					if ref[1] is not None:
						tmp += fullword.get(ref[1], "")
				ret.append(tmp)
			else:
				print("replace stage invalid: {0!r}".format(stage), file=sys.stderr)
		return ret

	ret = {}
	if "replace" in Data:
		assert "path" not in Data["replace"], \
			"path is not a valid channel for replacement rules"
		for channel in Data["replace"]:
			# if channel in word:
			ret[channel] = (
				doStagedMatchReplace(
					Data["replace"][channel], word.get(channel, ""), word
				)
			)
	else:  # Compatibility
		if "replacement" in Data:
			ret["val"] = (
				doStagedMatchReplace(
					Data["replacement"], word["val"], word
				)
			)
		if "replaceIPA" in Data:
			ret["ipa"] = (
				doStagedMatchReplace(
					Data["replaceIPA"], word["ipa"], word
				)
			)
	if keepHistory:
		for channel in ret:
			word[channel] = KHSep.join(ret[channel])
	else:
		for channel in ret:
			word[channel] = ret[channel][-1]
	return word


def listAll(Data, node, opts={
	"ipa": True,
	"HTML": False,
	"path": False,
	"depth": -16,
	"keepHistory": False,
	"keepHistorySep": "→",
	"ignoreZeros": True
}):
	'''Traverse all descendants of node'''

	def listWords(Data, node, depth, opts, path=[], flist=None):
		pass

	def nextPath(Data, node, path):

		return path

	global expansionCount
	# tmpbuf = []
	ret = DFSPrint(listAllR(
		Data, node, opts["depth"], opts["ignoreZeros"]
	))
	for word in ret:
		yield formatWord(
			applyRE(Data, {
				"val": word[0],
				"ipa": word[1],
				# DFSPrint doesn't work with paths
				"path": [0],
				"freq": word[2]}), opts)
		expansionCount = 0
		# newword = applyRE(Data, {"val":word[0], "ipa":word[1]})
		# word = (newword["val"], newword["ipa"], word[2])
		# tmpbuf.append(word[0]+' :\t'+word[1]+'\t'+str(word[2]))
		time.sleep(0.0001)
	# return '\n'.join(tmpbuf)


def listAllR(Data, node, depth, ignoreZeros, path=[], flist=None):
	'''Implementation of listAll. Do not call.'''
	if node in path:
		return {"t": 'V', "node": node}
	elif depth < 0:
		path.append(node)
		list = []
		if not flist:
			flist = [float(x.get("freq", one)) for x in Data[node]]
		listSum = sum(flist)
		if ignoreZeros:
			for i in range(len(Data[node])):
				if i < len(flist):
					# Copy Data[node][i] so that Data is not altered
					N = dict(Data[node][i])
					N["freq"] = flist[i]
					if flist[i]:
						list.append(N)
				else:
					if Data[node][i].get("freq"):
						list.append(Data[node][i])
		else:
			list = Data[node]

		matches = []
		for child in list:
			# 1+ elements are strings and 1+ elements are references to arrays
			# Determine which is a string and which is a reference
			matches.append({
				"t": 'A',
				"freq": child.get("freq", one) / listSum,
				"Acontents": []
			})
			# If no val, insert IPA anyway
			if not child.get("val", ""):
				matches[-1]["Acontents"].append(
					{"t": 'L', "val": '', "ipa": child.get("ipa", "")}
				)
			else:
				for s in refParse(child["val"]):
					# Recurse on reference and insert results into string
					if s[1]:
						nstr = s[1]
						node = Data[s[1]]
						if s[2]:
							_ = re.match('[^0-9.]+', s[2])
							if _:
								_ = _.end()
							flist = re.split('[^0-9.]+', s[2][_:])
							nstr = s[1]
							for i in range(min(len(flist), len(node))):
								d = float(flist[i])
								node[i]['freq'] = d
								# Flist.append(d)
								nstr += ',' + str(d)
							if nstr not in Data:
								Data[nstr] = node
						else:
							flist = None
						# Throws a KeyError on invalid reference. Not caught because
							# the Python default error message is good enough and
							# there's nothing for the code to do with an error.
						# Fill reference
						tmp = listAllR(Data, nstr, depth + 1, ignoreZeros, path, None)
						if s[0]:
							# If reference+literal text, insert
							matches[-1]["Acontents"].append({
								"t": 'L',
								"val": s[0],
								"ipa": child.get("ipa", "")
							})
							matches[-1]["Acontents"].append(tmp)
						else:
							matches[-1]["Acontents"].append(tmp)
					# No reference, only literal text
					else:
						matches[-1]["Acontents"].append({
							"t": 'L',
							"val": s[0],
							"ipa": child.get("ipa", "")
						})
		# path.pop()
		return {"t": 'N', "node": node, "sum": listSum, "Ncontents": matches}
	else:
		# Recursion depth reached
		print("wordgen.py: recursion depth reached", file=sys.stderr)
		return {"t": 'T', "node": node, "raw": Data[node]}


def DFSPrint(Node, freq=1):
	'''Generate list of words suitable for printing from tree structure.'''
	# Main case
	def f_A(Node, freq):
		buf1 = [("", "", 1)]
		for n in Node["Acontents"]:
			# tfreq = freq*Node["freq"]
			buf2 = DFSPrint(n, freq)
			# print('n: '+str(Node))
			# print('1: '+str(buf1))
			# print('2: '+str(buf2))
			buf3 = []
			for i in buf1:
				for j in buf2:
					# print('---\ni: '+str(i)+'\nj:'+str(j)+'\n---')
					buf3.append((i[0] + j[0], i[1] + j[1], i[2] * j[2]))
			# print('3: '+str(buf3))
			buf1 = buf3
		return buf1

	# Simply iterate and recurse
	def f_N(Node, freq):
		ret = []
		# N will always contain As
		for n in Node["Ncontents"]:
			ret.extend(DFSPrint(n, float(freq) * float(n["freq"])))
		return ret

	# Leaf
	def f_L(Node, freq):
		# print('L: '+str(path))
		return [(Node["val"], Node["ipa"], freq)]

	# Turn into reference
	def f_V(Node, freq):
		return [("{" + Node["node"] + "}", "{" + Node["node"] + "}", freq)]

	# Truncation -- pretend it's L but different
	def f_T(Node, freq):
		# print('T: '+str(freq))
		return [("{" + Node["node"] + "}", "{" + Node["node"] + "}", freq)]

	switch = {
		'A': f_A,
		'N': f_N,
		'L': f_L,
		'V': f_V,
		'T': f_T
	}
	return switch[Node['t']](Node, freq)


def formatWord(word, opts, formatStr=None):
	'''Print words'''
	if formatStr is not None:
		# dbgWord = word.copy()
		# del dbgWord["path"]
		# del dbgWord["freq"]
		# print(dbgWord)
		return formatStr.format(**word)
	else:
		if not opts["HTML"]:
			fstr = ""
			first = True
			for ch in opts["channels"]:
				if first:
					first = False
				else:
					fstr += "\t"
				fstr += "{" + ch
				if ch == "path":
					word[ch] = printPath(word.get(ch, ""))
				elif ch == "freq":
					# fstr += ":.4e"
					pass
				else:
					word[ch] = word.get(ch, "")
				fstr += "}"
		else:
			fstr = "<tr>"
			for ch in opts["channels"]:
				fstr += "<td>{" + ch + "}</td>"
				if ch == "path":
					word[ch] = printPath(word.get(ch, ""))
				elif ch == "freq":
					pass
				else:
					word[ch] = html.escape(word.get(ch, ""))
			fstr += "</tr>"
		word["val"] = word.get("val", "")
		return formatWord(word, opts, fstr)


def printPath(path):
	def recurse(path):
		ret = gmpy2.mpz(path[0]).digits(62)
		for a in path[1:]:
			if a:
				ret = ret + '[' + (recurse(a)) + ']'
		return ret
	if path:
		return '+' + recurse(path)
	else:
		return '+0'


def readPath(pathStr):
	# Simple token separator function - recognizes + [ ] and alphanumerics
	def tokensOf(pathStr):
		ret = ""
		inInt = False
		for c in pathStr:
			if c in "[]":
				inInt = False
				if ret:
					yield ret
				ret = c
			elif c in "+":
				pass
			elif c in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz":
				# Ints are in base 62
				# Int tokens can be multiple characters
				if inInt:
					ret = ret + c
				else:
					inInt = True
					if ret:
						yield ret
					ret = c
			else:
				# This shouldn't be hit -- raise ValueError?
				pass
		return ret

	def constructPath(tokens, i=0):
		ret = []
		while i < len(tokens):
			if tokens[i] == "[":
				tret, ti = constructPath(tokens, i + 1)
				ret.append(tret)
				if ti == len(tokens):
					pass
					# raise ValueError(
					# 	 "Unterminated subpath",
					# 	 i,
					# 	 str(tokens),
					# 	 str(tokens[i:])
					# )
				i = ti
			elif tokens[i] == "]":
				return ret, i
			else:
				ret.append(int(gmpy2.mpz(tokens[i], 62)))
			i += 1
		return ret, i
	try:
		return constructPath(list(tokensOf(pathStr)))[0]
	except ValueError as err:
		# In case of error, report the full path in addition to the subpath
		err.args = err.args + (pathStr, )
		raise


def followPath(Data, node, path):
	# print(node)
	# root = [{
	# 		 "val": x["val"],
	# 		 "ipa": x.get("ipa",""),
	# 		 "freq":float(x.get("freq",one))
	# 	} for x in Data[node] if x.get("freq",one)
	# ]
	sumFreq = sum(
		[float(x.get("freq", one)) for x in Data[node]]
	)
	SNode = {
		"val": Data[node][path[0]].get("val", ""),
		"ipa": Data[node][path[0]].get("ipa", ""),
		"freq": (float(Data[node][path[0]].get("freq", one)) / sumFreq)
	}
	rets = {"val": "", "ipa": "", "freq": one}
	# print(SNode)
	for i, s in enumerate(refParse(SNode["val"])):
		# Recurse on reference and insert results into string
		if s[1]:
			# Throws a KeyError on invalid reference. Not caught because
			# 	the Python default error message is good enough and there's
			# 	nothing for the code to do with an error.
			# Fill reference
			tmp = {"val": "", "ipa": "", "freq": one}
			if i + 1 < len(path):
				tmp = followPath(Data, s[1], path[i + 1])

			rets["val"] = rets["val"] + s[0] + tmp["val"]
			rets["freq"] = rets["freq"] * tmp["freq"]
			if s[0]:
				# If reference+literal text, insert
				rets["ipa"] = rets["ipa"] + SNode["ipa"] + tmp["ipa"]
			else:
				rets["ipa"] = rets["ipa"] + tmp["ipa"]
		# No reference, only literal text
		else:
			rets["val"] = rets["val"] + s[0]
			rets["ipa"] = rets["ipa"] + SNode["ipa"]
	return rets


def toBNF(Data, StartDef):
	nodes = Data.copy()
	for N in set(["replace", "replaceIPA", "replacement", "channels"]):
		nodes.pop(N)
	pass


def genPathCallback(option, opt, value, parser):
	parser.values.genFrom = True
	try:
		parser.values.genPath = readPath(value)
	except ValueError as err:
		raise OptionValueError(*err.args) from err


def main():
	global expansionCount

	# print(list(refParse("{} test")))

	# Enable shorthand for decimal numbers:
	def dec_repr(dumper, data):
		return dumper.represent_scalar(u'!d', 'd' + str(data))

	yaml.Dumper.add_representer(float, dec_repr)
	yaml.SafeDumper.add_representer(float, dec_repr)
	yaml.Loader.add_implicit_resolver(u'!d', re.compile(r'd\d*\.?\d+'), ['d'])
	yaml.SafeLoader.add_implicit_resolver(u'!d', re.compile(r'd\d*\.?\d+'), ['d'])

	def dec_cons(loader, node):
		return float(loader.construct_scalar(node)[1:])

	yaml.Loader.add_constructor(u'!d', dec_cons)
	yaml.SafeLoader.add_constructor(u'!d', dec_cons)

	# Command-line options
	parser = optparse.OptionParser(
		usage="usage: %prog [options] <datafile> <root> [command]\n"
		"  [command] may be either 'gen' [default], 'list', 'show', or 'diag'")

	genGroup = optparse.OptionGroup(parser, "Options for gen")
	listGroup = optparse.OptionGroup(parser, "Options for list")
	diagGroup = optparse.OptionGroup(parser, "Options for diag")
	showGroup = optparse.OptionGroup(parser, "Options for show")
	debugGroup = optparse.OptionGroup(parser, "Debugging options")

	parser.add_option("-c", "--channel", dest="channels",
		action="append", metavar="CHANNEL", default=[],
		help="print CHANNEL (can be used multiple times)")
	parser.add_option("-p", "--ipa", dest="channels",
		action="append_const", const="ipa",
		help="print IPA transcriptions (-c ipa)")
	parser.add_option("-d", "--depth", dest="depth",
		type="int", default=16,
		help="maximum recursion depth [default: %default]")
	parser.add_option("-H", "--html", dest="HTMLmode",
		action="store_true", default=False,
		help="write output as HTML table")
	genGroup.add_option("-n", dest="num",
		type="int", default=1, metavar="numWords",
		help="number of words to generate")
	genGroup.add_option("-V", dest="noVal",
		action="store_true", default=False,
		help="Suppress implicit 'val' printing")
	genGroup.add_option("-q", "--quiet", dest="quiet",
		action="store_true", default=False,
		help="Disable printing of the header")
	# Never implemented, and potentially unsafe
	# genGroup.add_option("-F", "--fmt", dest="fstr",
	# 	type="string", metavar="FMT_STR", default=[],
	# 	help="Format string for printing words")
	parser.add_option_group(genGroup)

	listGroup.add_option("-0", "--listZeros", dest="ignoreZeros",
		action="store_false", default=True,
		help="include 0-frequency values in list")
	parser.add_option_group(listGroup)

	showGroup.add_option("-D", "--startdepth", dest="showStartDepth",
		type="int", default=0,
		help="Starting depth")
	parser.add_option_group(showGroup)

	diagGroup.add_option("--regex", dest="dbgRE",
		action="store_true", default=False,
		help="Dump regular expressions after filtering.")
	diagGroup.add_option("--nodes", dest="dbgNodes",
		action="store_true", default=False,
		help="Dump switching nodes after filtering.")
	diagGroup.add_option("--retest", dest="dbgRETest",
		action="store_true", default=False,
		help="Apply regexes for CHANNELS to input.")
	diagGroup.add_option("--bnf", dest="dbgBNFExport",
		action="store_true", default=False,
		help="Export to BNF (val only).")
	parser.add_option_group(diagGroup)

	debugGroup.add_option("-P", "--path", dest="channels",
		action="append_const", const="path",
		help="print paths for generated words (-c path)")
	debugGroup.add_option("-K", "--keepHistory", dest="keepHistory",
		action="store_true", default=False,
		help="save every step of regex application\n"
		"May be hard to read.")
	debugGroup.add_option("--KHSep", dest="KHSep",
		type="string", default=" → ", metavar="SEP",
		help="what to insert between regex applications")
	debugGroup.add_option("-r", "--seed", dest="seed",
		action="store", default=None,
		help="random seed")
	debugGroup.add_option("-f", dest="channels",
		action="append_const", const="freq",
		help="show calculated frequencies (-c freq)")
	debugGroup.add_option("-G", "--generate", dest="genFrom",
		action="store", default=None,
		type="string", metavar="PATH",
		help="Generate a sentence from a path string.")
	parser.add_option_group(debugGroup)

	(options, args) = parser.parse_args()

	if len(args) < 2:
		parser.error("Not enough arguments")
	if len(args) < 3:
		args.append("gen")
	if "ipa" in options.channels:
		options.IPAmode = True
	else:
		options.IPAmode = False
	if "path" in options.channels:
		options.path = True
	else:
		options.path = False
	if "freq" in options.channels:
		options.showFreqs = True
	if not options.noVal:
		options.channels = ['val'] + options.channels
	opts = {
		"HTML": options.HTMLmode,
		"path": options.path,
		"depth": -1 * options.depth,
		"keepHistory": options.keepHistory,
		"keepHistorySep": options.KHSep,
		"ignoreZeros": options.ignoreZeros,
		"channels": options.channels,
		"genFrom": options.genFrom,
	}
	random.seed(options.seed)
	Data = yaml.safe_load(open(args[0], 'r', encoding="utf8"))
	if args[2] == "gen":
		Header = ""
		# Default some channel names for printing
		channels = {"val": "Words", "ipa": "IPA", "path": "Path"}
		if "channels" in Data:
			for ch, name in Data["channels"].items():
				channels[ch] = name
		if options.HTMLmode:
			Header = "<table><tr>"
			for ch in options.channels:
				Header += "<th>" + html.escape(channels.get(ch, ch)) + "</th>"
			Header += "</tr>"
			print(Header)
		else:
			if not options.quiet:
				Header += '\t'.join([
					channels.get(ch, ch) for ch in options.channels
				])
				print(Header)
				print('-' * 40)
		if list(refParse(args[1]))[0][1] is not None:
			Data[":arg"] = [{"val": args[1]}]
		else:
			Data[":arg"] = [{"val": "{" + args[1] + "}"}]
		try:
			for _ in range(options.num):
				expansionCount = 0
				if opts["genFrom"] is not None:
					raw_word = followPath(Data, ":arg", readPath(opts["genFrom"]))
				else:
					raw_word = chooseFrom(
						Data,
						Data[":arg"],
						-1 * options.depth - 1,
						options.depth
					)
				word = applyRE(
					Data,
					raw_word,
					options.keepHistory,
					options.KHSep
				)
				print(formatWord(word, opts))
		finally:
			if options.HTMLmode:
				print("</table>")
	elif args[2] == "list":
		for word in listAll(Data, args[1], opts):
			print(word)
	elif args[2] == "xform":
		word = yaml.safe_load(args[1])
		print(formatWord(applyRE(
			Data,
			word,
			options.keepHistory,
			options.KHSep
		), opts))
	elif args[2] == "show":
		showNodes(Data, args[1], options.depth + options.showStartDepth,
			options.depth)
	elif args[2] == "diag":
		if options.dbgRE:
			if "replace" in Data:
				for channel in Data["replace"]:
					print(channel + ':')
					for stage in Data["replace"][channel]:
						print('  [')
						for rule in stage:
							print(
								'    {'
								+ "m: {m}, r: {r}".format(
									m=repr(filterRE(rule['m'])),
									r=repr(rule['r'])
								) + '}'
							)
						print('  ]')
		if options.dbgNodes:
			pass
			# G = SwitchingGraph(Data)
			# G.addNode(":arg", [{"val": args[1]}])
			# print(repr(G))
			# print(repr(G[":arg"]))
		if options.dbgRETest:
			for ch in options.channels:
				pass  # NYI
		if options.dbgBNFExport:
			print('-' * 40)
			print(toBNF(Data, args[1]))
			print('-' * 40)
		if options.genFrom is not None:
			print(repr(readPath(options.genFrom)))
		else:
			for s in refParse(branches[stop]["val"]):
				pass


main()