Last active
August 29, 2015 14:04
-
-
Save midoribashikk/35ee1d6eb6b34d414b50 to your computer and use it in GitHub Desktop.
Twitterで全ツイート履歴が取得できるようになって楽しくなりましたが、 古いツイートの順番がおかしくなっていませんか? 私の場合、2010年11月4日以前のデータの 時刻が消えて00:00:00になっているようで(日付は無事)、 順序がところどころおかしくなっていました。 これを解消すべく、JSON形式ファイルの中身を タイムスタンプ順ではなくID順に並べ替えるプログラムを殴り書きしました。 <使用例> javac TwSorter.java java → TwSorter 2010_05.js ※元ファイルは、ファイル名の先頭に"~"をつけてコピーします。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Collections; | |
import java.util.ArrayList; | |
import java.io.FileReader; | |
import java.io.BufferedReader; | |
import java.io.FileWriter; | |
import java.io.BufferedWriter; | |
import java.io.PrintWriter; | |
public class TwSorter | |
{ | |
class Twdata implements Comparable<Twdata> | |
{ | |
private long id; | |
private ArrayList<String> datastrs; | |
public Twdata() | |
{ | |
this.id = 0; | |
this.datastrs = new ArrayList<String>(); | |
} | |
public int compareTo(Twdata data) | |
{ | |
int retval = 0; | |
if(data.id > this.id) | |
{ | |
retval = 1; | |
} | |
else if(data.id == this.id) | |
{ | |
retval = 0; | |
} | |
else | |
{ | |
retval = -1; | |
} | |
return retval; | |
} | |
}; | |
private String filename; | |
public static void main(String args[]) | |
{ | |
TwSorter obj = new TwSorter(); | |
obj.filename = args[0]; | |
try | |
{ | |
obj.copy(); | |
obj.make(); | |
} | |
catch(Exception e) | |
{ | |
System.out.println("Exception...orz " + e); | |
} | |
} | |
private String getCopyFileName(String filename) | |
{ | |
return filename.replace("tweets\\", "tweets\\~"); | |
} | |
private void copy() throws Exception | |
{ | |
FileReader fr = new FileReader(filename); | |
BufferedReader br = new BufferedReader(fr); | |
FileWriter fw = new FileWriter(getCopyFileName(filename)); | |
BufferedWriter bw = new BufferedWriter(fw); | |
PrintWriter pw = new PrintWriter(bw); | |
String lineGet; | |
while ((lineGet = br.readLine()) != null) | |
{ | |
pw.println(lineGet); | |
} | |
pw.close(); | |
} | |
private void make() throws Exception | |
{ | |
FileReader fr = new FileReader(getCopyFileName(filename)); | |
BufferedReader br = new BufferedReader(fr); | |
FileWriter fw = new FileWriter(filename); | |
BufferedWriter bw = new BufferedWriter(fw); | |
PrintWriter pw = new PrintWriter(bw); | |
ArrayList<Twdata> data = new ArrayList<Twdata>(); | |
Twdata work; | |
String lineGet; | |
String dataname = ""; | |
int i, j, lim, lines; | |
// input | |
work = new Twdata(); | |
while ((lineGet = br.readLine()) != null) | |
{ | |
if( lineGet.length() > 15 | |
&& lineGet.substring(0, 15).equals("Grailbird.data.") ) | |
{ | |
dataname = lineGet; | |
} | |
else if(lineGet.equals("}, {")) | |
{ | |
data.add(work); | |
work = new Twdata(); | |
} | |
else if( (lineGet.equals(" [ {")) | |
|| (lineGet.equals("} ]")) ) | |
{ | |
// do nothing | |
} | |
else | |
{ | |
work.datastrs.add(lineGet); | |
if( lineGet.length() > 9 | |
&& lineGet.substring(0, 9).equals(" \"id\" : ") ) | |
{ | |
work.id = Long.parseLong(lineGet.substring(9, lineGet.indexOf(","))); | |
} | |
} | |
} | |
data.add(work); | |
// sort | |
Collections.sort(data); | |
// output | |
lim = data.size(); | |
pw.println(dataname); | |
pw.println(" [ {"); | |
for(i=0; i<lim; i++) | |
{ | |
work = data.get(i); | |
lines = work.datastrs.size(); | |
for(j=0; j<lines; j++) | |
{ | |
pw.println(work.datastrs.get(j)); | |
} | |
if(i < lim-1) | |
{ | |
pw.println("}, {"); | |
} | |
} | |
pw.println("} ]"); | |
pw.close(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment