Skip to content

Instantly share code, notes, and snippets.

@tkhm
Last active February 26, 2017 11:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tkhm/22292e702e6753788afbee5c9649b251 to your computer and use it in GitHub Desktop.
Save tkhm/22292e702e6753788afbee5c9649b251 to your computer and use it in GitHub Desktop.
distinct rows with column 1 and 2.
#!/bin/bash
rm -f $2
echo "start $(date)"
cat $1 | while read line
do
echo "${line}" | awk -F, -v prev_id="${prev_id}" -v prev_date="${prev_date}" '
{if ($1 != prev_id || $2 != prev_date) print $0}
' >> $2
prev_id=`echo "${line}" | awk -F, '{print $1}'`
prev_date=`echo "${line}" | awk -F, '{print $2}'`
done
echo "end $(date)"
#!/bin/bash
rm -f $2
prev_id="init"
prev_date="init"
echo "start $(date)"
cat $1 | while read line
do
current_id=`echo "${line}" | awk -F, '{print $1}'`
current_date=`echo "${line}" | awk -F, '{print $2}'`
if [ "${current_date}" != "${prev_date}" ]; then
echo "${line}" >> $2
elif [ "${current_id}" != "${prev_id}" ]; then
echo "${line}" >> $2
fi
prev_id="${current_id}"
prev_date="${current_date}"
done
echo "end $(date)"
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
public class FilePrinter {
public void distinctLine(String inputPath, String outputPath) {
String delimiter = ",";
// set encoding for multi-byte language
try (FileInputStream fi = new FileInputStream(inputPath);
InputStreamReader ir = new InputStreamReader(fi, "UTF-8");
BufferedReader br = new BufferedReader(ir);
FileOutputStream fo = new FileOutputStream(outputPath);
OutputStreamWriter ow = new OutputStreamWriter(fo, "UTF-8");
BufferedWriter bw = new BufferedWriter(ow)) {
String line;
String prev_id = "";
String prev_date = "";
while ((line = br.readLine()) != null) {
String[] eachColumnValue = line.split(delimiter, -1);
String current_id = "";
String current_date = "";
current_id = eachColumnValue[0];
current_date = eachColumnValue[1];
if (!current_date.equals(prev_date) || !current_id.equals(prev_id)) {
bw.write(line + "\n");
}
prev_id = current_id;
prev_date = current_date;
}
// close one by one.
// this try braket is written with try-catch-resources style, so you
// do not need to write close() in catch/finally braket
bw.close();
ow.close();
fo.close();
br.close();
ir.close();
fi.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment