Last active
February 26, 2017 11:12
-
-
Save tkhm/22292e702e6753788afbee5c9649b251 to your computer and use it in GitHub Desktop.
distinct rows with column 1 and 2.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
rm -f $2 | |
echo "start $(date)" | |
cat $1 | while read line | |
do | |
echo "${line}" | awk -F, -v prev_id="${prev_id}" -v prev_date="${prev_date}" ' | |
{if ($1 != prev_id || $2 != prev_date) print $0} | |
' >> $2 | |
prev_id=`echo "${line}" | awk -F, '{print $1}'` | |
prev_date=`echo "${line}" | awk -F, '{print $2}'` | |
done | |
echo "end $(date)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
rm -f $2 | |
prev_id="init" | |
prev_date="init" | |
echo "start $(date)" | |
cat $1 | while read line | |
do | |
current_id=`echo "${line}" | awk -F, '{print $1}'` | |
current_date=`echo "${line}" | awk -F, '{print $2}'` | |
if [ "${current_date}" != "${prev_date}" ]; then | |
echo "${line}" >> $2 | |
elif [ "${current_id}" != "${prev_id}" ]; then | |
echo "${line}" >> $2 | |
fi | |
prev_id="${current_id}" | |
prev_date="${current_date}" | |
done | |
echo "end $(date)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.BufferedReader; | |
import java.io.BufferedWriter; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.io.OutputStreamWriter; | |
public class FilePrinter { | |
public void distinctLine(String inputPath, String outputPath) { | |
String delimiter = ","; | |
// set encoding for multi-byte language | |
try (FileInputStream fi = new FileInputStream(inputPath); | |
InputStreamReader ir = new InputStreamReader(fi, "UTF-8"); | |
BufferedReader br = new BufferedReader(ir); | |
FileOutputStream fo = new FileOutputStream(outputPath); | |
OutputStreamWriter ow = new OutputStreamWriter(fo, "UTF-8"); | |
BufferedWriter bw = new BufferedWriter(ow)) { | |
String line; | |
String prev_id = ""; | |
String prev_date = ""; | |
while ((line = br.readLine()) != null) { | |
String[] eachColumnValue = line.split(delimiter, -1); | |
String current_id = ""; | |
String current_date = ""; | |
current_id = eachColumnValue[0]; | |
current_date = eachColumnValue[1]; | |
if (!current_date.equals(prev_date) || !current_id.equals(prev_id)) { | |
bw.write(line + "\n"); | |
} | |
prev_id = current_id; | |
prev_date = current_date; | |
} | |
// close one by one. | |
// this try braket is written with try-catch-resources style, so you | |
// do not need to write close() in catch/finally braket | |
bw.close(); | |
ow.close(); | |
fo.close(); | |
br.close(); | |
ir.close(); | |
fi.close(); | |
} catch (FileNotFoundException e) { | |
e.printStackTrace(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment