Created
April 3, 2012 17:56
-
-
Save belun/2294183 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.common.collect.LinkedListMultimap; | |
import com.google.common.collect.Multimap; | |
import org.apache.commons.lang3.StringUtils; | |
import java.io.*; | |
import java.util.*; | |
import java.util.regex.Pattern; | |
public class DataTranslator { | |
public static void main(String[] arguments) { | |
if(arguments.length < 1) { | |
System.out.println("Please provide a file name."); | |
return; | |
} | |
String fileName = arguments[0]; | |
try { | |
DataTranslator dataTranslator = new DataTranslator(); | |
Translations columnTranslations = dataTranslator.readTranslationsFrom(fileName + FileExtension.COLUMNS); | |
Translations identifierTranslations = dataTranslator.readTranslationsFrom(fileName + FileExtension.IDENTIFIERS); | |
RawData rawData = dataTranslator.readRawDataFrom(fileName + FileExtension.RAW_DATA); | |
TranslatedData translatedData = rawData.translateUsing(columnTranslations, identifierTranslations); | |
translatedData.saveToFile(fileName + FileExtension.TRANSLATED_DATA); | |
} catch (Exception exception) { | |
System.out.println("Error!"); | |
} | |
} | |
private interface Index { | |
interface Configuration { | |
short RAW_NAME = 0; | |
short TRANSLATION = 1; | |
} | |
interface RawData { | |
short IDENTIFIER = 0; | |
} | |
} | |
private interface FileExtension { | |
String COLUMNS = ".columns"; | |
String IDENTIFIERS = ".ids"; | |
String RAW_DATA = ".rawdata"; | |
String TRANSLATED_DATA = ".data"; | |
} | |
private static final String TAB = "\t"; | |
private static final Pattern breakerByTab = Pattern.compile(TAB); | |
private static final String NEW_LINE = "\n"; | |
private Translations readTranslationsFrom(String fileName) throws Exception { | |
BufferedReader reader = new BufferedReader(new FileReader(fileName)); | |
String fileLine; | |
Translations translations = new Translations(); | |
while((fileLine = reader.readLine()) != null) { | |
translations.from(fileLine); | |
} | |
return translations; | |
} | |
private class Translations { | |
private final Map<String, String> translations = new HashMap<String, String>(); | |
public void from(String fileLine) { | |
String[] translation = breakerByTab.split(fileLine); | |
translations.put(translation[Index.Configuration.RAW_NAME], | |
translation[Index.Configuration.TRANSLATION]); | |
} | |
public String translate(String rawConfiguration) throws NotTranslationFound { | |
if (translations.containsKey(rawConfiguration)) { | |
return translations.get(rawConfiguration); | |
} | |
throw new NotTranslationFound(); | |
} | |
} | |
private class NotTranslationFound extends RuntimeException { } | |
public RawData readRawDataFrom(String fileName) throws Exception { | |
BufferedReader reader = new BufferedReader(new FileReader(fileName)); | |
String[] columns = breakerByTab.split(reader.readLine()); | |
String fileLine; | |
HeaderlessData headerlessData = new HeaderlessData(); | |
try { | |
while((fileLine = reader.readLine()) != null) { | |
headerlessData.from(fileLine); | |
} | |
} finally { | |
reader.close(); | |
} | |
return new RawData(columns, headerlessData); | |
} | |
private class RawData { | |
private final HeaderlessData headerlessData; | |
private final String[] columns; | |
public RawData(String[] columns, HeaderlessData headerlessData) { | |
this.columns = columns; | |
this.headerlessData = headerlessData; | |
} | |
public TranslatedData translateUsing(Translations columnTranslations, Translations identifiersTranslations) { | |
HeaderlessData translatedHeaderlessData = headerlessData.translateUsing(identifiersTranslations); | |
return new SemiTranslatedData(columns, translatedHeaderlessData).translateUsing(columnTranslations); | |
} | |
} | |
private class HeaderlessData { | |
private final Multimap<String, String> lines; | |
public HeaderlessData() { | |
this(LinkedListMultimap.<String, String>create()); | |
} | |
private HeaderlessData(Multimap<String, String> lines) { | |
this.lines = lines; | |
} | |
public void from(String fileLine) { | |
String[] rawDataLine = breakerByTab.split(fileLine); | |
String identifier = rawDataLine[Index.RawData.IDENTIFIER]; | |
for (int index = Index.RawData.IDENTIFIER + 1; index < rawDataLine.length; index++) { | |
lines.put(identifier, rawDataLine[index]); | |
} | |
} | |
public HeaderlessData translateUsing(Translations identifierTranslations) { | |
Multimap<String, String> translatedLines = LinkedListMultimap.create(); | |
for (String rawIdentifier: lines.keys()) { | |
try { | |
String translatedIdentifier = identifierTranslations.translate(rawIdentifier); | |
Collection<String> lineOfData = lines.get(rawIdentifier); | |
translatedLines.putAll(translatedIdentifier, lineOfData); | |
} catch (NotTranslationFound exception) { | |
System.out.println("Skipping identifier [" + rawIdentifier +"]"); | |
} | |
} | |
return new HeaderlessData(translatedLines); | |
} | |
public HeaderlessData clone() { | |
return new HeaderlessData(LinkedListMultimap.create(lines)); | |
} | |
public void dropColumnNumber(int indexToDrop) { | |
for (String identifier: lines.keys()) { | |
Collection<String> lineOfData = lines.get(identifier); | |
Iterator<String> dataIterator = lineOfData.iterator(); | |
int index = 0; | |
while(index < lineOfData.size()) { | |
dataIterator.next(); | |
if(index == indexToDrop) { | |
dataIterator.remove(); | |
break; | |
} | |
index++; | |
} | |
} | |
} | |
public void saveTo(BufferedWriter writer) throws Exception { | |
for (String identifier: lines.keys()) { | |
Collection<String> lineOfData = lines.get(identifier); | |
writer.write(identifier + TAB + StringUtils.join(lineOfData, TAB) + NEW_LINE); | |
} | |
} | |
} | |
private class SemiTranslatedData { | |
private final HeaderlessData headerlessData; | |
private final String[] columns; | |
public SemiTranslatedData(String[] columns, HeaderlessData headerlessData) { | |
this.columns = columns; | |
this.headerlessData = headerlessData.clone(); | |
} | |
public TranslatedData translateUsing(Translations columnTranslations) { | |
LinkedList<String> translatedColumns = new LinkedList<String>(); | |
for (int index = 0; index < columns.length; index++) { | |
String rawColumn = columns[index]; | |
try { | |
String translatedColumn = columnTranslations.translate(rawColumn); | |
translatedColumns.add(translatedColumn); | |
} catch (NotTranslationFound exception) { | |
System.out.println("Skipping column [" + rawColumn +"]"); | |
headerlessData.dropColumnNumber(index); | |
} | |
} | |
return new TranslatedData((String[]) translatedColumns.toArray(), headerlessData); | |
} | |
} | |
private class TranslatedData { | |
private final HeaderlessData headerlessData; | |
private final String[] columns; | |
public TranslatedData(String[] columns, HeaderlessData headerlessData) { | |
this.columns = columns; | |
this.headerlessData = headerlessData; | |
} | |
public void saveToFile(String fileName) throws Exception { | |
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName)); | |
try { | |
writer.write(StringUtils.join(columns, TAB) + NEW_LINE); | |
headerlessData.saveTo(writer); | |
} finally { | |
writer.close(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment