#tab-width: 3
import sys
import yaml
import random
import itertools
from string import *

# Arguments: filename, numWords, maxSyllables, IPAmode

random.seed()

data = yaml.load(file(sys.argv[1],'r'))

freqs = [(sum([x["freq"] for x in data["Onsets"]]),[x["freq"] for x in data["Onsets"]]),
	(sum([x["freq"] for x in data["Vowels"]]),[x["freq"] for x in data["Vowels"]]),
	(sum([x["freq"] for x in data["Codas"]]),[x["freq"] for x in data["Codas"]])]

print str([freqs[0][0], freqs[1][0], freqs[2][0]])

def getRandomV(maxChance, chances, vowels):
	"""Select a random consonant from an input list. \
	maxChance MUST equal sum(chances)"""
	a = random.uniform(0,maxChance)
	stop = 0
	for i,c in enumerate(chances): # range(len(chances)):
		a -= c
		if a <= 0:
			stop = i
			break;
	return vowels[i]

def getRandomC(maxChance, chances, cons, depth = 0):
	"""Select a random vowel from and input list. \
	maxChance MUST equal sum(chances)"""
	if not depth:
		v = getRandomV(maxChance, chances, cons)
		if isinstance(v["val"],list):
			# 1+ elements are strings and 1+ elements are references to arrays
			rets = {"val": "", "ipa": ""}
			#Determine which is a string and which is a reference
			for s in Formatter().parse(''.join(v["val"])):
				#Recurse on reference and insert results into string
				if s[1]:
					tmp2 = getRandomC(sum([x["freq"] for x in data[s[1]]]),
						[x["freq"] for x in data[s[1]]], data[s[1]], depth+1)
					rets["val"] = rets["val"] + s[0] + tmp2["val"]
					if s[0]:
						rets["ipa"] = rets["ipa"] + v["ipa"] + tmp2["ipa"]
					else:
						rets["ipa"] = rets["ipa"] + tmp2["ipa"]
				#No reference, only literal text
				else:
					rets["val"] = rets["val"] + s[0]
					rets["ipa"] = rets["ipa"] + v["ipa"]
			return {"val": rets["val"], "ipa": rets["ipa"]}
			
			#Temporary
			return {"val": "s", "ipa": "s"}
		else:
			return {"val": v["val"], "ipa": v["ipa"]}
	else:
		c = {}
		while True:
			c = getRandomV(maxChance, chances, cons)
			if not isinstance(c["val"],list):
				break
		return {"val": c["val"], "ipa": c["ipa"]}

def isDigraph(s1, s2):
	"""Check to see if a spelling is ambiguous"""
	# Duplicated consonants (and like vowels) are confusing
	#if s1[-1] == s2[0]:
	if s1.endswith(s2[0]):
		return True
	# Vowels at end of syllable are likely to be ambiguous
	elif s1[-1] in u'aeioóuy' and s2[0] in u'aeioóu':
		return True
	#"ir" is not a valid nucleus, but all other vowel+r are ambiguous
	elif s1[-1] in 'aeou' and s2[0] == 'r':
		return True
	elif s1[-1] == 'r' and s2[0] in 'pbkgtdszfvnml':
		return True
	#"u" at end of syllable may be taken for "uh" when it should be "uu"
	elif s1[-1] == 'u' and (len(s1) > 1 and s1[-2] != 'u'):
		return True
	# May be difficult to know which syllable S's are in
	elif s1[-1] == 's' and s2[0] in 'pktnml':
		return True
	elif (len(s1) > 1 and s1[-2] == 's'):
		return True
	return False

def makeWords(n,maxSyls):
	"""Generate a list of n words with up to maxSyls syllables. maxSyls must be\
	greater than 0"""
	words = []
	for x in range(n):
		cword = []
		for x in range(random.randrange(1,maxSyls+1)):
			sTree = []
			sIPA = []
			#Get onset
			tmp = getRandomC(freqs[0][0],freqs[0][1],data["Onsets"])
			sTree.append(tmp["val"])
			sIPA.append(tmp["ipa"])
			#get rhyme
			#get nucleus
			tmp = getRandomV(freqs[1][0],freqs[1][1],data["Vowels"])
			#get coda
			freqAll = (freqs[2][0]+data["nullCoda"]["freq"], freqs[2][1]+[data["nullCoda"]["freq"]])
			allCodas = data["Codas"]
			allCodas.append(data["nullCoda"])
			tmp2 = getRandomC(freqAll[0],freqAll[1],allCodas)
			#Select correct nucleus for coda
			if isinstance(tmp["val"], list):
				if tmp2["ipa"]:
					sTree.append(tmp["val"][1])
				else: #null coda
					sTree.append(tmp["val"][0])
			else:
				sTree.append(tmp["val"])
			sIPA.append(tmp["ipa"])
			sTree.append(tmp2["val"])
			sIPA.append(tmp2["ipa"])
			# if isinstance(sTree[-1]["val"], list):
				# pass
			# #vowel doesn't change based on end
			# else:
				# toRemove = []
				# index = len(sTree)-1
				# for i in range(len(sTree[index]["val"])):
					# c = sTree[index]["val"][i]
					# if c == data["Vars"]["AnyCoda"]:
						# tmp = getRandomC(freqAll[2][0],freqAll[2][1],allCodas)
						# sTree.append(tmp["val"])
						# sIPA.append(tmp["ipa"])
						# toRemove.append(i)
					# elif c == data["Vars"]["RealCoda"]:
						# tmp = getRandomC(freqs[2][0],freqs[2][1],data["Codas"])
						# sTree.append(tmp["val"])
						# sIPA.append(tmp["ipa"])
						# toRemove.append(i)
				# #sTree[index] = str([])
				# tmp = list(sTree[index]["val"])
				# for i in toRemove:
					# del tmp[i]
				# sTree[index]["val"] = ''.join(tmp)
			#add flattened sTree to cword
			cword.append((''.join(list(itertools.chain.from_iterable(sTree))),
				''.join(list(itertools.chain.from_iterable(sIPA)))))
		#add apostrophes between syllables that need them
		spelling = []
		for index in range(len(cword[:-1])):
			if isDigraph(cword[index][0],cword[index+1][0]):
				spelling.append(cword[index][0] + "'")
			else:
				spelling.append(cword[index][0])
		spelling.append(cword[-1][0])
		word = (''.join(spelling), '.'.join([x[1] for x in cword]))
		words.append(word)
	return words;

def printWords(words, IPAmode):
	if (IPAmode):
		for a in words:
			print a[0] + ': /' + a[1] + '/'
	else:
		for a in words:
			print a[0]

printWords(makeWords(int(sys.argv[2]),int(sys.argv[3])), int(sys.argv[4]))