Last active
November 17, 2024 21:03
-
-
Save m-sterling/93712042f2fecd5dd8f6efd9d8cf1e3d to your computer and use it in GitHub Desktop.
Basic Markov chain using your Discord messages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Basic Markov chain | |
| # | |
| # | |
| # Prerequisites: | |
| # | |
| # - Discord data package | |
| # - extract data package (specifically `messages/`) and | |
| # run the following command in this `messages/` directory: | |
| # $ for i in *; do mv $i/messages.json $i.json && rm -rf $i/; done | |
| import os, json, string, time, re, random | |
| DEBUG = True | |
| MAX_LENGTH = 25 | |
| def start_learning(): | |
| input_dir = './discord' | |
| files = os.listdir(input_dir) | |
| knowledge = { | |
| '': {} | |
| } | |
| for file in files: | |
| if file[-5:] == '.json' and file != 'index.json': | |
| with open(f'{input_dir}/{file}', 'r') as f: | |
| data = json.loads(f.read()) | |
| for msg in data: | |
| learn(knowledge, msg['Contents']) | |
| return knowledge | |
| def learn(knowledge, message): | |
| words = message.split() | |
| words = [word for word in words if not (re.search(r'<(?::\w+:|@!*&*|#)[0-9]+>', word) or re.search(rf'[{string.punctuation}]+', word))] # https://redd.it/iibxms/ | |
| if len(words) == 1: | |
| word = words[0].lower() | |
| for p in string.punctuation: | |
| word = word.replace(p, '') | |
| if word not in knowledge['']: | |
| knowledge[''][word] = 0 | |
| knowledge[''][word] += 1 | |
| else: | |
| for i in range(len(words) - 1): | |
| w1 = words[i].lower() | |
| w2 = words[i+1].lower() | |
| for p in string.punctuation: | |
| w1 = w1.replace(p, '') | |
| w2 = w2.replace(p, '') | |
| if i == 0: | |
| if w1 not in knowledge['']: | |
| knowledge[''][w1] = 1 | |
| knowledge[''][w1] += 1 | |
| if w1 not in knowledge: | |
| knowledge[w1] = {} | |
| if w2 not in knowledge[w1]: | |
| knowledge[w1][w2] = 0 | |
| knowledge[w1][w2] += 1 | |
| def load_knowledge() -> dict[str]: | |
| KNOWLEDGE_FILE = './knowledge.json' | |
| try: | |
| with open(KNOWLEDGE_FILE, 'r') as f: | |
| return json.load(f) | |
| except Exception as e: # couldn't open file or data corrupted or something else | |
| print(e) | |
| if DEBUG: | |
| debug_time_start = time.time() | |
| print(f'Start learning at {debug_time_start}') | |
| knowledge = start_learning() | |
| if DEBUG: | |
| debug_time_end = time.time() | |
| print(f'End learning at {debug_time_end} ({round(debug_time_end - debug_time_start, 6)}s)') | |
| print(f'Knowledge has {len(knowledge)} entries') | |
| with open(KNOWLEDGE_FILE, 'w') as f: | |
| json.dump(knowledge, f) | |
| return knowledge | |
| def main(): | |
| knowledge = load_knowledge() | |
| print('Ready.') | |
| running = True | |
| while running: | |
| text = input('> ') | |
| x = text.split(' ')[-1].lower() | |
| while x in knowledge and len(text.split(' ')) < MAX_LENGTH: | |
| x = random.choices(list(knowledge[x].keys()), knowledge[x].values(), k=1)[0] | |
| text += f' {x}' | |
| print(text) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment