Skip to content

Instantly share code, notes, and snippets.

@kishida
Created February 26, 2012 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kishida/1918496 to your computer and use it in GitHub Desktop.
Save kishida/1918496 to your computer and use it in GitHub Desktop.
Amazonランキングを1時間ごとに取得するJavaコード
/*
* Amazonランキングを1時間ごとに取得する。
* NekoHTMLが必要。
*/
package amazonrank;
import java.io.*;
import java.net.URL;
import java.util.Date;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
*
* @author naoki
*/
public class AmazonWatcher {
public static void main(String[] args) throws IOException, SAXException, XPathExpressionException{
final String[] urls = {//順位を取得するAmazonURL
"http://www.amazon.co.jp/dp/4839914826/",
"http://www.amazon.co.jp/dp/4777515486/",
"http://www.amazon.co.jp/dp/4839926816/",
"http://www.amazon.co.jp/dp/4839932530/",
};
final String outfile = "amazon.txt";//出力ファイル名
ScheduledExecutorService exec = Executors.newSingleThreadScheduledExecutor();
exec.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
try{
FileWriter fw = new FileWriter(outfile, true);
PrintWriter pw = new PrintWriter(fw);
for(String url : urls){
String result = getAmazonRank(url, new Date());
System.out.println(result);
pw.println(result);
}
}catch(Exception e){
System.out.println(e.getMessage());
}
}
}, 0, 1, TimeUnit.HOURS);
}
private static String getAmazonRank(String url, Date today) throws IOException, SAXException, XPathExpressionException{
URL u = new URL(url);
InputStream is = u.openStream();
try{
//HTMLからDOMを取得
DOMParser parser = new DOMParser();
parser.setFeature("http://xml.org/sax/features/namespaces", false);//ネームスペースを無効に
parser.parse(new InputSource(new InputStreamReader(is, "Shift_JIS")));
Document doc = parser.getDocument();
//XPathの準備
XPathFactory xpf = XPathFactory.newInstance();
XPath xp = xpf.newXPath();
//ランキングを取得
String rankSection = xp.evaluate("//LI[@id='SalesRank']", doc);
Pattern p = Pattern.compile("([0-9,]+)位");
Matcher m = p.matcher(rankSection);
String rank;
if(m.find()){
rank = m.group(1);
}else{
rank = "error";
}
//タイトルを取得
String title = xp.evaluate("//IMG[@id='prodImage']/@alt", doc);
//出力文字列を作成
return String.format("%tY/%<th/%<td %<tH:%<tM:%<tS %s %s %s", today, url, rank, title);
}finally{
is.close();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment