add markov to cyberlina
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
import random
|
||||
# import re
|
||||
import json
|
||||
import os # listdir, path.join, path.isdir
|
||||
|
||||
# from collections import Counter
|
||||
|
||||
# punctuation to remove
|
||||
RM_PUNCT = u"'\"”„+-—:;()"
|
||||
|
||||
|
||||
# Markov random Text Generator
|
||||
class MTG(object):
|
||||
def __init__(self, data: dict):
|
||||
self.MC = data or {}
|
||||
|
||||
def train_from_seq(self, word_seq):
|
||||
for w1, w2, w3 in self.triples(word_seq):
|
||||
key = w1 + '+' + w2
|
||||
if key not in self.MC:
|
||||
self.MC[key] = []
|
||||
self.MC[key].append(w3)
|
||||
|
||||
def train_from_text(self, text, remove=RM_PUNCT):
|
||||
text = text.replace('.', ' . ').replace(',', ' , ')
|
||||
word_seq = text.strip().split()
|
||||
word_seq = [word.strip(remove) for word in word_seq if len(word.strip(remove)) != 0]
|
||||
self.train_from_seq(word_seq)
|
||||
|
||||
# add data form another MTG
|
||||
def __iadd__(self, other):
|
||||
for key, value in other.MC.items():
|
||||
if key not in self.MC:
|
||||
self.MC[key] = []
|
||||
self.MC[key].extend(value)
|
||||
|
||||
def save(self):
|
||||
return self.MC
|
||||
|
||||
# generate all triples to build MC
|
||||
|
||||
def triples(self, word_seq):
|
||||
if len(word_seq) < 3:
|
||||
return
|
||||
for i in range(len(word_seq) - 2):
|
||||
yield (word_seq[i], word_seq[i + 1], word_seq[i + 2])
|
||||
|
||||
def choose_start_words(self, first_word=None, second_word=None):
|
||||
if second_word is not None:
|
||||
return first_word, second_word
|
||||
elif first_word is not None:
|
||||
if len(self.MC['.' + '+' + first_word]) != 0:
|
||||
second_word = random.choice(self.MC['.' + '+' + first_word])
|
||||
elif len(self.MC['.' + '+' + first_word.capitalize()]) != 0:
|
||||
second_word = random.choice(self.MC['.' + '+' + first_word.capitalize()])
|
||||
else:
|
||||
return self.choose_start_words()
|
||||
return first_word, second_word
|
||||
else:
|
||||
first_word = random.choice(self.MC[random.choice(self.MC.keys())])
|
||||
return self.choose_start_words(first_word, second_word)
|
||||
|
||||
def generate_text(self, first_word=None, second_word=None, size=30):
|
||||
w1, w2 = self.choose_start_words(first_word, second_word)
|
||||
gen_words = [w1.capitalize()]
|
||||
while not (len(gen_words) > size and w2 == '.'):
|
||||
gen_words.append(w2.capitalize() if (w1 == '.') else w2)
|
||||
w1, w2 = w2, random.choice(self.MC[w1 + '+' + w2])
|
||||
gen_words.append(w2)
|
||||
text = ' '.join(gen_words)
|
||||
text = text.replace(' ,', ',').replace(' .', '.')
|
||||
return text
|
||||
Reference in New Issue
Block a user