Skip to content

Instantly share code, notes, and snippets.

@AntonyKapustin
Created August 11, 2015 09:04
Show Gist options
  • Save AntonyKapustin/d817900c03a7c4b257cf to your computer and use it in GitHub Desktop.
Save AntonyKapustin/d817900c03a7c4b257cf to your computer and use it in GitHub Desktop.
Read file word by word solution.
public static void main(String[] args) throws IOException {
// OptionalInt max = Files.lines(Paths.get("/", "1.txt"), Charset.forName("windows-1251"))
// .flatMap(s -> Arrays.stream(s.split("[\\P{L}]+")))
// .peek(System.out::println)
// .mapToInt(s -> s.length())
// .max();
//
//(max.getAsInt());
String content = new String(Files.readAllBytes(Paths.get("/", "1.txt")), Charset.forName("windows-1251"));
//System.out.println(content);
// OptionalInt max = Arrays.stream(content.split("[\\P{L}]+"))
// .peek(System.out::println)
// .mapToInt(s -> s.length())
// .max();
// System.out.println(max.getAsInt());
String maxWord = "";
//String s// = "This is how I tried to split a paragraph into a sentence. But, there is a problem. My paragraph includes dates like Jan.13, 2014 , words like U.S and numbers like 2.2. They all got splitted by the above code.";
Pattern re = Pattern.compile("[^.!?\\s][^.!?]*(?:[.!?](?!['\"]?\\s|$)[^.!?]*)*[.!?]?['\"]?(?=\\s|$)", Pattern.MULTILINE | Pattern.COMMENTS);
Matcher reMatcher = re.matcher(content);
while (reMatcher.find()) {
String[] words = reMatcher.group().split("[\\P{L}]+");
for(String word : words){
System.out.print(word.toLowerCase() + " ");
if(word.length() > maxWord.length()){
maxWord = word;
}
}
System.out.println();
}
System.out.println(maxWord);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment