Skip to content

Instantly share code, notes, and snippets.

@m-sterling
Last active November 17, 2024 21:03
Show Gist options
  • Select an option

  • Save m-sterling/93712042f2fecd5dd8f6efd9d8cf1e3d to your computer and use it in GitHub Desktop.

Select an option

Save m-sterling/93712042f2fecd5dd8f6efd9d8cf1e3d to your computer and use it in GitHub Desktop.
Basic Markov chain using your Discord messages
# Basic Markov chain
#
#
# Prerequisites:
#
# - Discord data package
# - extract data package (specifically `messages/`) and
# run the following command in this `messages/` directory:
# $ for i in *; do mv $i/messages.json $i.json && rm -rf $i/; done
import os, json, string, time, re, random
DEBUG = True
MAX_LENGTH = 25
def start_learning():
input_dir = './discord'
files = os.listdir(input_dir)
knowledge = {
'': {}
}
for file in files:
if file[-5:] == '.json' and file != 'index.json':
with open(f'{input_dir}/{file}', 'r') as f:
data = json.loads(f.read())
for msg in data:
learn(knowledge, msg['Contents'])
return knowledge
def learn(knowledge, message):
words = message.split()
words = [word for word in words if not (re.search(r'<(?::\w+:|@!*&*|#)[0-9]+>', word) or re.search(rf'[{string.punctuation}]+', word))] # https://redd.it/iibxms/
if len(words) == 1:
word = words[0].lower()
for p in string.punctuation:
word = word.replace(p, '')
if word not in knowledge['']:
knowledge[''][word] = 0
knowledge[''][word] += 1
else:
for i in range(len(words) - 1):
w1 = words[i].lower()
w2 = words[i+1].lower()
for p in string.punctuation:
w1 = w1.replace(p, '')
w2 = w2.replace(p, '')
if i == 0:
if w1 not in knowledge['']:
knowledge[''][w1] = 1
knowledge[''][w1] += 1
if w1 not in knowledge:
knowledge[w1] = {}
if w2 not in knowledge[w1]:
knowledge[w1][w2] = 0
knowledge[w1][w2] += 1
def load_knowledge() -> dict[str]:
KNOWLEDGE_FILE = './knowledge.json'
try:
with open(KNOWLEDGE_FILE, 'r') as f:
return json.load(f)
except Exception as e: # couldn't open file or data corrupted or something else
print(e)
if DEBUG:
debug_time_start = time.time()
print(f'Start learning at {debug_time_start}')
knowledge = start_learning()
if DEBUG:
debug_time_end = time.time()
print(f'End learning at {debug_time_end} ({round(debug_time_end - debug_time_start, 6)}s)')
print(f'Knowledge has {len(knowledge)} entries')
with open(KNOWLEDGE_FILE, 'w') as f:
json.dump(knowledge, f)
return knowledge
def main():
knowledge = load_knowledge()
print('Ready.')
running = True
while running:
text = input('> ')
x = text.split(' ')[-1].lower()
while x in knowledge and len(text.split(' ')) < MAX_LENGTH:
x = random.choices(list(knowledge[x].keys()), knowledge[x].values(), k=1)[0]
text += f' {x}'
print(text)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment