#tab-width: 3 import sys import yaml import random import itertools from string import * # Arguments: filename, numWords, maxSyllables, IPAmode random.seed() data = yaml.load(file(sys.argv[1],'r')) freqs = [(sum([x["freq"] for x in data["Onsets"]]),[x["freq"] for x in data["Onsets"]]), (sum([x["freq"] for x in data["Vowels"]]),[x["freq"] for x in data["Vowels"]]), (sum([x["freq"] for x in data["Codas"]]),[x["freq"] for x in data["Codas"]])] print str([freqs[0][0], freqs[1][0], freqs[2][0]]) def getRandomV(maxChance, chances, vowels): """Select a random consonant from an input list. \ maxChance MUST equal sum(chances)""" a = random.uniform(0,maxChance) stop = 0 for i,c in enumerate(chances): # range(len(chances)): a -= c if a <= 0: stop = i break; return vowels[i] def getRandomC(maxChance, chances, cons, depth = 0): """Select a random vowel from and input list. \ maxChance MUST equal sum(chances)""" if not depth: v = getRandomV(maxChance, chances, cons) if isinstance(v["val"],list): # 1+ elements are strings and 1+ elements are references to arrays rets = {"val": "", "ipa": ""} #Determine which is a string and which is a reference for s in Formatter().parse(''.join(v["val"])): #Recurse on reference and insert results into string if s[1]: tmp2 = getRandomC(sum([x["freq"] for x in data[s[1]]]), [x["freq"] for x in data[s[1]]], data[s[1]], depth+1) rets["val"] = rets["val"] + s[0] + tmp2["val"] if s[0]: rets["ipa"] = rets["ipa"] + v["ipa"] + tmp2["ipa"] else: rets["ipa"] = rets["ipa"] + tmp2["ipa"] #No reference, only literal text else: rets["val"] = rets["val"] + s[0] rets["ipa"] = rets["ipa"] + v["ipa"] return {"val": rets["val"], "ipa": rets["ipa"]} #Temporary return {"val": "s", "ipa": "s"} else: return {"val": v["val"], "ipa": v["ipa"]} else: c = {} while True: c = getRandomV(maxChance, chances, cons) if not isinstance(c["val"],list): break return {"val": c["val"], "ipa": c["ipa"]} def isDigraph(s1, s2): """Check to see if a spelling is ambiguous""" # Duplicated consonants (and like vowels) are confusing #if s1[-1] == s2[0]: if s1.endswith(s2[0]): return True # Vowels at end of syllable are likely to be ambiguous elif s1[-1] in u'aeioóuy' and s2[0] in u'aeioóu': return True #"ir" is not a valid nucleus, but all other vowel+r are ambiguous elif s1[-1] in 'aeou' and s2[0] == 'r': return True elif s1[-1] == 'r' and s2[0] in 'pbkgtdszfvnml': return True #"u" at end of syllable may be taken for "uh" when it should be "uu" elif s1[-1] == 'u' and (len(s1) > 1 and s1[-2] != 'u'): return True # May be difficult to know which syllable S's are in elif s1[-1] == 's' and s2[0] in 'pktnml': return True elif (len(s1) > 1 and s1[-2] == 's'): return True return False def makeWords(n,maxSyls): """Generate a list of n words with up to maxSyls syllables. maxSyls must be\ greater than 0""" words = [] for x in range(n): cword = [] for x in range(random.randrange(1,maxSyls+1)): sTree = [] sIPA = [] #Get onset tmp = getRandomC(freqs[0][0],freqs[0][1],data["Onsets"]) sTree.append(tmp["val"]) sIPA.append(tmp["ipa"]) #get rhyme #get nucleus tmp = getRandomV(freqs[1][0],freqs[1][1],data["Vowels"]) #get coda freqAll = (freqs[2][0]+data["nullCoda"]["freq"], freqs[2][1]+[data["nullCoda"]["freq"]]) allCodas = data["Codas"] allCodas.append(data["nullCoda"]) tmp2 = getRandomC(freqAll[0],freqAll[1],allCodas) #Select correct nucleus for coda if isinstance(tmp["val"], list): if tmp2["ipa"]: sTree.append(tmp["val"][1]) else: #null coda sTree.append(tmp["val"][0]) else: sTree.append(tmp["val"]) sIPA.append(tmp["ipa"]) sTree.append(tmp2["val"]) sIPA.append(tmp2["ipa"]) # if isinstance(sTree[-1]["val"], list): # pass # #vowel doesn't change based on end # else: # toRemove = [] # index = len(sTree)-1 # for i in range(len(sTree[index]["val"])): # c = sTree[index]["val"][i] # if c == data["Vars"]["AnyCoda"]: # tmp = getRandomC(freqAll[2][0],freqAll[2][1],allCodas) # sTree.append(tmp["val"]) # sIPA.append(tmp["ipa"]) # toRemove.append(i) # elif c == data["Vars"]["RealCoda"]: # tmp = getRandomC(freqs[2][0],freqs[2][1],data["Codas"]) # sTree.append(tmp["val"]) # sIPA.append(tmp["ipa"]) # toRemove.append(i) # #sTree[index] = str([]) # tmp = list(sTree[index]["val"]) # for i in toRemove: # del tmp[i] # sTree[index]["val"] = ''.join(tmp) #add flattened sTree to cword cword.append((''.join(list(itertools.chain.from_iterable(sTree))), ''.join(list(itertools.chain.from_iterable(sIPA))))) #add apostrophes between syllables that need them spelling = [] for index in range(len(cword[:-1])): if isDigraph(cword[index][0],cword[index+1][0]): spelling.append(cword[index][0] + "'") else: spelling.append(cword[index][0]) spelling.append(cword[-1][0]) word = (''.join(spelling), '.'.join([x[1] for x in cword])) words.append(word) return words; def printWords(words, IPAmode): if (IPAmode): for a in words: print a[0] + ': /' + a[1] + '/' else: for a in words: print a[0] printWords(makeWords(int(sys.argv[2]),int(sys.argv[3])), int(sys.argv[4]))