Skip to content

Instantly share code, notes, and snippets.

@belun
Created April 3, 2012 17:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save belun/2294183 to your computer and use it in GitHub Desktop.
Save belun/2294183 to your computer and use it in GitHub Desktop.
import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Multimap;
import org.apache.commons.lang3.StringUtils;
import java.io.*;
import java.util.*;
import java.util.regex.Pattern;
public class DataTranslator {
public static void main(String[] arguments) {
if(arguments.length < 1) {
System.out.println("Please provide a file name.");
return;
}
String fileName = arguments[0];
try {
DataTranslator dataTranslator = new DataTranslator();
Translations columnTranslations = dataTranslator.readTranslationsFrom(fileName + FileExtension.COLUMNS);
Translations identifierTranslations = dataTranslator.readTranslationsFrom(fileName + FileExtension.IDENTIFIERS);
RawData rawData = dataTranslator.readRawDataFrom(fileName + FileExtension.RAW_DATA);
TranslatedData translatedData = rawData.translateUsing(columnTranslations, identifierTranslations);
translatedData.saveToFile(fileName + FileExtension.TRANSLATED_DATA);
} catch (Exception exception) {
System.out.println("Error!");
}
}
private interface Index {
interface Configuration {
short RAW_NAME = 0;
short TRANSLATION = 1;
}
interface RawData {
short IDENTIFIER = 0;
}
}
private interface FileExtension {
String COLUMNS = ".columns";
String IDENTIFIERS = ".ids";
String RAW_DATA = ".rawdata";
String TRANSLATED_DATA = ".data";
}
private static final String TAB = "\t";
private static final Pattern breakerByTab = Pattern.compile(TAB);
private static final String NEW_LINE = "\n";
private Translations readTranslationsFrom(String fileName) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String fileLine;
Translations translations = new Translations();
while((fileLine = reader.readLine()) != null) {
translations.from(fileLine);
}
return translations;
}
private class Translations {
private final Map<String, String> translations = new HashMap<String, String>();
public void from(String fileLine) {
String[] translation = breakerByTab.split(fileLine);
translations.put(translation[Index.Configuration.RAW_NAME],
translation[Index.Configuration.TRANSLATION]);
}
public String translate(String rawConfiguration) throws NotTranslationFound {
if (translations.containsKey(rawConfiguration)) {
return translations.get(rawConfiguration);
}
throw new NotTranslationFound();
}
}
private class NotTranslationFound extends RuntimeException { }
public RawData readRawDataFrom(String fileName) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String[] columns = breakerByTab.split(reader.readLine());
String fileLine;
HeaderlessData headerlessData = new HeaderlessData();
try {
while((fileLine = reader.readLine()) != null) {
headerlessData.from(fileLine);
}
} finally {
reader.close();
}
return new RawData(columns, headerlessData);
}
private class RawData {
private final HeaderlessData headerlessData;
private final String[] columns;
public RawData(String[] columns, HeaderlessData headerlessData) {
this.columns = columns;
this.headerlessData = headerlessData;
}
public TranslatedData translateUsing(Translations columnTranslations, Translations identifiersTranslations) {
HeaderlessData translatedHeaderlessData = headerlessData.translateUsing(identifiersTranslations);
return new SemiTranslatedData(columns, translatedHeaderlessData).translateUsing(columnTranslations);
}
}
private class HeaderlessData {
private final Multimap<String, String> lines;
public HeaderlessData() {
this(LinkedListMultimap.<String, String>create());
}
private HeaderlessData(Multimap<String, String> lines) {
this.lines = lines;
}
public void from(String fileLine) {
String[] rawDataLine = breakerByTab.split(fileLine);
String identifier = rawDataLine[Index.RawData.IDENTIFIER];
for (int index = Index.RawData.IDENTIFIER + 1; index < rawDataLine.length; index++) {
lines.put(identifier, rawDataLine[index]);
}
}
public HeaderlessData translateUsing(Translations identifierTranslations) {
Multimap<String, String> translatedLines = LinkedListMultimap.create();
for (String rawIdentifier: lines.keys()) {
try {
String translatedIdentifier = identifierTranslations.translate(rawIdentifier);
Collection<String> lineOfData = lines.get(rawIdentifier);
translatedLines.putAll(translatedIdentifier, lineOfData);
} catch (NotTranslationFound exception) {
System.out.println("Skipping identifier [" + rawIdentifier +"]");
}
}
return new HeaderlessData(translatedLines);
}
public HeaderlessData clone() {
return new HeaderlessData(LinkedListMultimap.create(lines));
}
public void dropColumnNumber(int indexToDrop) {
for (String identifier: lines.keys()) {
Collection<String> lineOfData = lines.get(identifier);
Iterator<String> dataIterator = lineOfData.iterator();
int index = 0;
while(index < lineOfData.size()) {
dataIterator.next();
if(index == indexToDrop) {
dataIterator.remove();
break;
}
index++;
}
}
}
public void saveTo(BufferedWriter writer) throws Exception {
for (String identifier: lines.keys()) {
Collection<String> lineOfData = lines.get(identifier);
writer.write(identifier + TAB + StringUtils.join(lineOfData, TAB) + NEW_LINE);
}
}
}
private class SemiTranslatedData {
private final HeaderlessData headerlessData;
private final String[] columns;
public SemiTranslatedData(String[] columns, HeaderlessData headerlessData) {
this.columns = columns;
this.headerlessData = headerlessData.clone();
}
public TranslatedData translateUsing(Translations columnTranslations) {
LinkedList<String> translatedColumns = new LinkedList<String>();
for (int index = 0; index < columns.length; index++) {
String rawColumn = columns[index];
try {
String translatedColumn = columnTranslations.translate(rawColumn);
translatedColumns.add(translatedColumn);
} catch (NotTranslationFound exception) {
System.out.println("Skipping column [" + rawColumn +"]");
headerlessData.dropColumnNumber(index);
}
}
return new TranslatedData((String[]) translatedColumns.toArray(), headerlessData);
}
}
private class TranslatedData {
private final HeaderlessData headerlessData;
private final String[] columns;
public TranslatedData(String[] columns, HeaderlessData headerlessData) {
this.columns = columns;
this.headerlessData = headerlessData;
}
public void saveToFile(String fileName) throws Exception {
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName));
try {
writer.write(StringUtils.join(columns, TAB) + NEW_LINE);
headerlessData.saveTo(writer);
} finally {
writer.close();
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment