Markov Models

xiaoxiao2022-07-03 216

#!/usr/bin/python #encoding:utf-8 """ @author: LlQ @contact:LIQINGLIN54951@gmail.com @file：cp9_p178.py @time: 5/19/2019 7:00 PM """ from urllib.request import urlopen from random import randint def wordListSum(wordList): sum = 0 for word, value in wordList.items(): sum += value return sum def retrieveRandomWord(wordList):#Markov Models randIndex = randint(1, wordListSum(wordList))# for word, value in wordList.items(): randIndex -= value if randIndex <= 0: return word def buildWordDict(text): # Remove newlines and quotes text = text.replace('\n', ' '); text = text.replace('"', ''); # Make sure punctuation marks are treated as their own "words," # so that they will be included in the Markov chain punctuaction = [',','.',';',':'] # putting spaces around the punctuation for symbol in punctuaction: text = text.replace(symbol, ' {} '.format(symbol)) # text = text.replace(symbol, " "+symbol+" "); words = text.split(' ') # Filter our empty words words = [word for word in words if word != ''] wordDict = {} #it builds a two-dimensional dictionary—a dictionary of dictionaries for i in range(1, len(words)): if words[i-1] not in wordDict: # Create a new dictionary for this word wordDict[ words[i-1] ] = {} #{words[i-1]:{}} if words[i] not in wordDict[ words[i-1] ]: wordDict[ words[i-1] ][ words[i] ] = 0 #{words[i-1]:{words[i]:0}} wordDict[ words[i-1] ][ words[i] ] += 1 #{words[i-1]:{words[i]:1}} return wordDict text = str(urlopen('http://pythonscraping.com/files/inaugurationSpeech.txt').read(), 'utf-8') #print(text) wordDict = buildWordDict(text) #print(wordDict) # Generate a Markov chain of length 100 length =100 chain = ['Called'] for i in range(0, length): newWord = retrieveRandomWord(wordDict[chain[-1]])#Markov Models chain.append(newWord) print( ' '.join(chain)) # for i in range(0, length): # chain += currentWord+" " # currentWord = retrieveRandomWord(wordDict[currentWord]) # print(chain)

最新回复(0)