Created
August 11, 2015 09:04
-
-
Save AntonyKapustin/d817900c03a7c4b257cf to your computer and use it in GitHub Desktop.
Read file word by word solution.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static void main(String[] args) throws IOException { | |
// OptionalInt max = Files.lines(Paths.get("/", "1.txt"), Charset.forName("windows-1251")) | |
// .flatMap(s -> Arrays.stream(s.split("[\\P{L}]+"))) | |
// .peek(System.out::println) | |
// .mapToInt(s -> s.length()) | |
// .max(); | |
// | |
//(max.getAsInt()); | |
String content = new String(Files.readAllBytes(Paths.get("/", "1.txt")), Charset.forName("windows-1251")); | |
//System.out.println(content); | |
// OptionalInt max = Arrays.stream(content.split("[\\P{L}]+")) | |
// .peek(System.out::println) | |
// .mapToInt(s -> s.length()) | |
// .max(); | |
// System.out.println(max.getAsInt()); | |
String maxWord = ""; | |
//String s// = "This is how I tried to split a paragraph into a sentence. But, there is a problem. My paragraph includes dates like Jan.13, 2014 , words like U.S and numbers like 2.2. They all got splitted by the above code."; | |
Pattern re = Pattern.compile("[^.!?\\s][^.!?]*(?:[.!?](?!['\"]?\\s|$)[^.!?]*)*[.!?]?['\"]?(?=\\s|$)", Pattern.MULTILINE | Pattern.COMMENTS); | |
Matcher reMatcher = re.matcher(content); | |
while (reMatcher.find()) { | |
String[] words = reMatcher.group().split("[\\P{L}]+"); | |
for(String word : words){ | |
System.out.print(word.toLowerCase() + " "); | |
if(word.length() > maxWord.length()){ | |
maxWord = word; | |
} | |
} | |
System.out.println(); | |
} | |
System.out.println(maxWord); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment