Skip to content

Instantly share code, notes, and snippets.

@youtalk
Created February 24, 2012 12:20
Show Gist options
  • Save youtalk/1900624 to your computer and use it in GitHub Desktop.
Save youtalk/1900624 to your computer and use it in GitHub Desktop.
Web search using Yahoo Web Search API
package jp.youtalk;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.text.ParseException;
import java.util.LinkedList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class YahooWebSearchAPI {
private final String appid;
private final int size;
private final String url = "http://search.yahooapis.jp/WebSearchService/V2/webSearch";
private final String charset = "UTF-8";
private final XPath xpath = XPathFactory.newInstance().newXPath();
public YahooWebSearchAPI(final String appid, final int size) {
this.appid = appid;
this.size = size;
}
public synchronized List<String> searchWikipediaSummary(final String query) {
List<String> results = new LinkedList<String>();
try {
String xml = search(new URL(makeQuery(query + "+Wikipedia")));
Document doc = xmlToDocument(xml);
for (int i = 0, size = size(doc, "ResultSet/Result"); i < size; i++) {
String s = value(doc, "ResultSet/Result[" + (i + 1) + "]/Summary");
while (s.indexOf("(") != -1 && s.indexOf(")") != -1) // remove rubies
s = s.substring(0, s.indexOf("(")) + s.substring(s.indexOf(")") + 1, s.length() - 1);
if (s.indexOf("。") != -1) // extract leading one sentence
s = s.substring(0, s.indexOf("。") + 1);
results.add(s.trim().replace(" ", ""));
}
} catch (Exception e) {
e.printStackTrace();
return searchWikipediaSummary(query); // try again
}
return results;
}
public static void main(final String[] args) {
YahooWebSearchAPI api = new YahooWebSearchAPI("appid here", 5);
List<String> results = api.searchWikipediaSummary("奈良");
for (String r: results)
System.out.println(r);
}
private String makeQuery(final String sentence)
throws UnsupportedEncodingException {
return url + "?appid=" + appid +
"&query=" + URLEncoder.encode(sentence, charset) + "&results=" + size;
}
private int size(final Document doc, final String expression)
throws XPathExpressionException {
NodeList list = (NodeList) xpath.evaluate(expression, doc, XPathConstants.NODESET);
return list.getLength();
}
private String value(final Document doc, final String expression)
throws XPathExpressionException {
return xpath.evaluate(expression, doc);
}
private Document xmlToDocument(final String xml)
throws IOException, SAXException, ParserConfigurationException {
StringReader sr = new StringReader(xml);
InputSource is = new InputSource(sr);
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(is);
return doc;
}
private String search(final URL url)
throws IOException, ParseException {
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setDoOutput(true);
connection.connect();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is, charset);
BufferedReader br = new BufferedReader(isr);
StringBuilder sb = new StringBuilder();
String line;
while ((line = br.readLine()) != null) {
sb.append(line);
sb.append("\n");
}
br.close();
connection.disconnect();
return sb.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment