Last active
August 29, 2015 14:24
-
-
Save steffiwilson/4cf1db36c84f0120d725 to your computer and use it in GitHub Desktop.
Reddit Daily Programmer Challenge 221 [Hard]: Poetry In a Haystack
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Problem statement: https://www.reddit.com/r/dailyprogrammer/comments/3bzipa/20150703_challenge_221_hard_poetry_in_a_haystack/ | |
//English word list file: https://code.google.com/p/dotnetperls-controls/downloads/detail?name=enable1.txt | |
//text file in which to find the poetry: https://gist.githubusercontent.com/anonymous/c8fb349e9ae4fcb40cb5/raw/05a1ef03626057e1b57b5bbdddc4c2373ce4b465/challenge.txt | |
import java.io.*; | |
import java.util.*; | |
class poetryInAHaystack { | |
public static void main(String[] args) { | |
long startTime = System.currentTimeMillis(); | |
File wordList = new File("englishWordList.txt"); | |
HashSet<String> englishDictionary = new HashSet<String>(); | |
String newWord = ""; | |
try(BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(wordList)))) { | |
while ((newWord = reader.readLine()) != null) { | |
englishDictionary.add(newWord); | |
} | |
} | |
catch (IOException e) { | |
e.printStackTrace(); | |
} | |
File haystack = new File("haystack.txt"); | |
String currentLine = ""; | |
try(BufferedReader haystackReader = new BufferedReader(new InputStreamReader(new FileInputStream(haystack)))) { | |
while ((currentLine = haystackReader.readLine()) != null) { | |
if (isEnglish(currentLine, englishDictionary)) | |
System.out.println(currentLine); | |
} | |
} | |
catch (IOException e) { | |
e.printStackTrace(); | |
} | |
long endTime = System.currentTimeMillis(); | |
System.out.println("Running time: " + (endTime - startTime)); | |
} | |
public static boolean isEnglish(String words, HashSet<String> dictionary) { | |
String[] wordArray = words.split(" "); | |
int englishWords = 0; | |
int gibberish = 0; | |
for (int i = 0; i < wordArray.length; i++) { | |
wordArray[i] = wordArray[i].replaceAll("[^a-zA-Z]","").toLowerCase(); //strip nonalphabetic chars and lowercase to match dictionary | |
if (dictionary.contains(wordArray[i])) | |
englishWords++; | |
else | |
gibberish++; | |
} | |
if (englishWords > gibberish) //probably english | |
return true; | |
else | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment