Skip to content

Instantly share code, notes, and snippets.

@rmuhamedgaliev
Last active August 29, 2015 14:24
Show Gist options
  • Save rmuhamedgaliev/37ca6fabf529562acd7f to your computer and use it in GitHub Desktop.
Save rmuhamedgaliev/37ca6fabf529562acd7f to your computer and use it in GitHub Desktop.
Parser
package io.github.rmuhamedgaliev;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* @author <a href="mailto:rinat.muhamedgaliev@gmail.com">Rinat Muhamedgaliev</a>
*/
public class Converter {
private static List<File> files = new ArrayList<>();
public static void main(String[] args) throws IOException {
listFilesForFolder(Paths.get("txt").toFile());
files.stream().forEach(f -> {
Map<String, Object> result = readTextFromFile(f);
System.out.println("finish");
});
}
private static void listFilesForFolder(final File folder) {
for (final File fileEntry : folder.listFiles()) {
if (fileEntry.isDirectory()) {
listFilesForFolder(fileEntry);
} else {
files.add(fileEntry);
}
}
}
private static Map<String, Object> readTextFromFile(File file) {
StringBuffer stringBuffer = new StringBuffer();
stringBuffer.append("\n" + file.getName() + "\n");
Map<String, String> paragraphs = null;
try {
paragraphs = parseLines(Files.readAllLines(file.toPath()));
} catch (IOException e) {
e.printStackTrace();
}
return filterNumbers(paragraphs);
}
private static String extractNumberFromText(String text) {
return text.split(" ")[0];
}
private static Map<String, Object> filterNumbers(Map<String, String> numbers) {
Map<String, Object> result = new TreeMap<>();
Set<String> removeItems = new TreeSet<>();
Map<String, String> headers = new HashMap<>();
numbers.keySet().stream().forEach(number -> {
List<String> keys = numbers.keySet().stream().filter(n -> n.startsWith(number + ".")).collect(Collectors.toList());
if (keys.size() > 0) {
Map<String, String> paragraphMap = new TreeMap<>();
keys.stream().forEach(k -> {
String paragraph = numbers.get(k);
paragraphMap.put(k, paragraph);
removeItems.add(k);
});
String headParagraph = numbers.get(number);
headers.put(number, headParagraph);
result.put(number, paragraphMap);
removeItems.add(number);
}
});
numbers.keySet().removeAll(removeItems);
result.putAll(numbers);
Map<String, Object> newResult = new TreeMap<>(result);
result.keySet().stream().forEach(key -> {
if (result.get(key).getClass().isAssignableFrom(TreeMap.class)) {
Map<String, String> internalKeys = (Map<String, String>) result.get(key);
Set<String> p = new TreeSet<>();
internalKeys.keySet().stream().forEach(inKey -> {
if (result.containsKey(inKey)) {
Map<String, String> teKeys = (Map<String, String>) result.get(inKey);
p.addAll(teKeys.keySet());
}
});
internalKeys.keySet().removeAll(p);
newResult.put(key, internalKeys);
}
});
newResult.keySet().stream().forEach(elem -> {
if (newResult.get(elem).getClass().isAssignableFrom(TreeMap.class)) {
Map<String, String> internalKeys = (Map<String, String>) result.get(elem);
String paragraphElem = headers.get(elem);
internalKeys.put(elem, paragraphElem);
newResult.put(elem, internalKeys);
}
});
return newResult;
}
private static Map<String, String> parseLines(List<String> lines) {
Map<String, String> paragraphs = new HashMap<>();
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
if (line != null && !line.isEmpty()) {
if (!(line.indexOf("..") > 0)) {
Matcher m = Pattern.compile("^(\\d{1,5}\\.(\\d{1,5})?){1,10}( +)([а-яА-Яa-zA-Z0-9_,:\\(\\)\\[\\].\\- ]*)$").matcher(line);
if (m.find()) {
String result = m.group();
paragraphs.put(
extractNumberFromText(result),
result.substring(result.indexOf(" "))
);
}
}
}
}
return paragraphs;
}
private static void writeTOFile(String filname, String content) {
try (
BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("out/" + filname)));
) {
bufferedWriter.write(content);
} catch (IOException e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment