Created
January 9, 2021 17:02
-
-
Save ParadauxIO/0daf2d57b29cbdf7cbf6c0b772336465 to your computer and use it in GitHub Desktop.
This is a fork of the JMegaHal project by Paul James Mutton taken as Licensed GPLv1, with modifications by myself Rían Errity Licensed under GPLv3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c) 2020, Rían Errity. All rights reserved. | |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
* | |
* This code is free software; you can redistribute it and/or modify it | |
* under the terms of the GNU General Public License version 3 only, as | |
* published by the Free Software Foundation. | |
* | |
* This code is distributed in the hope that it will be useful, but WITHOUT | |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
* version 3 for more details (a copy is included in the LICENSE file that | |
* accompanied this code). | |
* | |
* You should have received a copy of the GNU General Public License version | |
* 3 along with this work; if not, write to the Free Software Foundation, | |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
* | |
* Please contact Rían Errity <rian@paradaux.io> or visit https://paradaux.io | |
* if you need additional information or have any questions. | |
* See LICENSE.md for more details. | |
*/ | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.net.URL; | |
import java.util.*; | |
/** | |
* Modifications Copyright Rían Errity, 2021, https://paradaux.io | |
* Copyright Paul James Mutton, 2001-2004, http://www.jibble.org/ | |
* | |
* @author pjm2 | |
* | |
* This software is dual-licensed, allowing you to choose between the GNU | |
* General Public License (GPL) and the www.jibble.org Commercial License. | |
* Since the GPL may be too restrictive for use in a proprietary application, | |
* a commercial license is also provided. Full license information can be found at http://www.jibble.org/licenses/ | |
* | |
* */ | |
public class MarkovMegaHal { | |
// These are valid chars for words. Anything else is treated as punctuation. | |
public static final String WORD_CHARS = "abcdefghijklmnopqrstuvwxyz" | |
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
+ "0123456789"; | |
public static final String END_CHARS = ".!?"; | |
private final HashMap<String, HashSet<Quad>> words = new HashMap<>(); | |
private final HashMap<Quad, Quad> quads = new HashMap<>(); | |
private final HashMap<Quad, HashSet<String>> next = new HashMap<>(); | |
private final HashMap<Quad, HashSet<String>> previous = new HashMap<>(); | |
private final Random rand = new Random(); | |
/** | |
* MarkovMegaHal is a fork of JMegaHal maintained by Rían Errity. | |
* */ | |
public MarkovMegaHal() { | |
} | |
/** | |
* Adds an entire documents to the 'brain'. Useful for feeding in | |
* stray theses, but be careful not to put too much in, or you may | |
* run out of memory! | |
*/ | |
public void addDocument(String uri) throws IOException { | |
BufferedReader reader = new BufferedReader(new InputStreamReader(new URL(uri).openStream())); | |
StringBuffer buffer = new StringBuffer(); | |
int ch; | |
while ((ch = reader.read()) != -1) { | |
buffer.append((char) ch); | |
if (END_CHARS.indexOf((char) ch) >= 0) { | |
String sentence = buffer.toString(); | |
sentence = sentence.replace('\r', ' '); | |
sentence = sentence.replace('\n', ' '); | |
add(sentence); | |
buffer = new StringBuffer(); | |
} | |
} | |
add(buffer.toString()); | |
reader.close(); | |
} | |
/** | |
* Adds a new sentence to the 'brain.' | |
*/ | |
public void add(String sentence) { | |
sentence = sentence.trim(); | |
ArrayList<String> parts = new ArrayList<>(); | |
char[] chars = sentence.toCharArray(); | |
int i = 0; | |
boolean punctuation = false; | |
StringBuffer buffer = new StringBuffer(); | |
while (i < chars.length) { | |
char ch = chars[i]; | |
if ((WORD_CHARS.indexOf(ch) >= 0) == punctuation) { | |
punctuation = !punctuation; | |
String token = buffer.toString(); | |
if (token.length() > 0) { | |
parts.add(token); | |
} | |
buffer = new StringBuffer(); | |
//i++; | |
continue; | |
} | |
buffer.append(ch); | |
i++; | |
} | |
String lastToken = buffer.toString(); | |
if (lastToken.length() > 0) { | |
parts.add(lastToken); | |
} | |
if (parts.size() >= 4) { | |
for (i = 0; i < parts.size() - 3; i++) { | |
Quad quad = new Quad(parts.get(i), parts.get(i + 1), parts.get(i + 2), parts.get(i + 3)); | |
if (quads.containsKey(quad)) { | |
quad = quads.get(quad); | |
} else { | |
quads.put(quad, quad); | |
} | |
if (i == 0) { | |
quad.setCanStart(true); | |
} | |
if (i == parts.size() - 4) { | |
quad.setCanEnd(true); | |
} | |
for (int n = 0; n < 4; n++) { | |
String token = parts.get(i + n); | |
if (!words.containsKey(token)) { | |
words.put(token, new HashSet<>(1)); | |
} | |
HashSet<Quad> set = words.get(token); | |
set.add(quad); | |
} | |
if (i > 0) { | |
String previousToken = parts.get(i - 1); | |
if (!previous.containsKey(quad)) { | |
previous.put(quad, new HashSet<>(1)); | |
} | |
HashSet<String> set = previous.get(quad); | |
set.add(previousToken); | |
} | |
if (i < parts.size() - 4) { | |
String nextToken = parts.get(i + 4); | |
if (!next.containsKey(quad)) { | |
next.put(quad, new HashSet<>(1)); | |
} | |
HashSet<String> set = next.get(quad); | |
set.add(nextToken); | |
} | |
} | |
} | |
} | |
/** | |
* Generate a random sentence from the brain. | |
*/ | |
public String getSentence() { | |
return getSentence(null); | |
} | |
/** | |
* Generate a sentence that includes (if possible) the specified word. | |
*/ | |
public String getSentence(String word) { | |
LinkedList<String> parts = new LinkedList<>(); | |
Quad[] quads; | |
if (words.containsKey(word)) { | |
quads = (words.get(word)).toArray(new Quad[0]); | |
} else { | |
quads = this.quads.keySet().toArray(new Quad[0]); | |
} | |
if (quads.length == 0) { | |
return ""; | |
} | |
Quad middleQuad = quads[rand.nextInt(quads.length)]; | |
Quad quad = middleQuad; | |
for (int i = 0; i < 4; i++) { | |
parts.add(quad.getToken(i)); | |
} | |
while (!quad.canEnd()) { | |
String[] nextTokens = (next.get(quad)).toArray(new String[0]); | |
String nextToken = nextTokens[rand.nextInt(nextTokens.length)]; | |
quad = this.quads.get(new Quad(quad.getToken(1), quad.getToken(2), quad.getToken(3), nextToken)); | |
parts.add(nextToken); | |
} | |
quad = middleQuad; | |
while (!quad.canStart()) { | |
String[] previousTokens = (previous.get(quad)).toArray(new String[0]); | |
String previousToken = previousTokens[rand.nextInt(previousTokens.length)]; | |
quad = this.quads.get(new Quad(previousToken, quad.getToken(0), quad.getToken(1), quad.getToken(2))); | |
parts.addFirst(previousToken); | |
} | |
StringBuilder sentence = new StringBuilder(); | |
for (Object part : parts) { | |
String token = (String) part; | |
sentence.append(token); | |
} | |
return sentence.toString(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c) 2020, Rían Errity. All rights reserved. | |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
* | |
* This code is free software; you can redistribute it and/or modify it | |
* under the terms of the GNU General Public License version 3 only, as | |
* published by the Free Software Foundation. | |
* | |
* This code is distributed in the hope that it will be useful, but WITHOUT | |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
* version 3 for more details (a copy is included in the LICENSE file that | |
* accompanied this code). | |
* | |
* You should have received a copy of the GNU General Public License version | |
* 3 along with this work; if not, write to the Free Software Foundation, | |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
* | |
* Please contact Rían Errity <rian@paradaux.io> or visit https://paradaux.io | |
* if you need additional information or have any questions. | |
* See LICENSE.md for more details. | |
*/ | |
import java.io.Serializable; | |
public class Quad implements Serializable { | |
private final String[] tokens; | |
private boolean canStart; | |
private boolean canEnd; | |
public Quad(String paramString1, String paramString2, String paramString3, String paramString4) { | |
this.canStart = false; | |
this.canEnd = false; | |
this.tokens = new String[] { paramString1, paramString2, paramString3, paramString4 }; | |
} | |
public String getToken(int paramInt) { | |
return this.tokens[paramInt]; | |
} | |
public void setCanStart(boolean paramBoolean) { | |
this.canStart = paramBoolean; | |
} | |
public void setCanEnd(boolean paramBoolean) { | |
this.canEnd = paramBoolean; | |
} | |
public boolean canStart() { | |
return this.canStart; | |
} | |
public boolean canEnd() { | |
return this.canEnd; | |
} | |
public int hashCode() { | |
return this.tokens[0].hashCode() + this.tokens[1].hashCode() + this.tokens[2].hashCode() + this.tokens[3].hashCode(); | |
} | |
public boolean equals(Object paramObject) { | |
if (!(paramObject instanceof Quad)) { | |
return false; | |
} | |
Quad quad = (Quad) paramObject; | |
return (quad.tokens[0].equals(this.tokens[0]) && quad.tokens[1].equals(this.tokens[1]) && quad.tokens[2].equals(this.tokens[2]) | |
&& quad.tokens[3].equals(this.tokens[3])); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment