Skip to content

Instantly share code, notes, and snippets.

@arcatdmz
Last active December 28, 2015 11:58
Show Gist options
  • Save arcatdmz/7496840 to your computer and use it in GitHub Desktop.
Save arcatdmz/7496840 to your computer and use it in GitHub Desktop.
Look for the publication year of the paper. (Revised to connect to Google Scholar.)
package evernote;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PublicationYearSearch {
public static void main(String[] args) {
int year = getPublicationYear(args[0]);
System.out.print(year);
if (year < 0) {
System.exit(1);
}
}
public static int getPublicationYear(String title) {
int year = -1;
HttpURLConnection conn = null;
BufferedReader reader = null;
BufferedWriter writer = null;
try {
// Build query URL.
StringBuilder sb = new StringBuilder();
sb.append("http://scholar.google.com/scholar?hl=en");
sb.append("&q=%22");
sb.append(URLEncoder.encode(title, "UTF-8"));
sb.append("%22");
String urlString = sb.toString();
// Open HTTP connection.
URL url = new URL(urlString);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.44 Safari/534.7");
conn.setRequestProperty("Referer", urlString);
conn.connect();
reader = new BufferedReader(
new InputStreamReader((conn.getInputStream())));
// Compile regex pattern to look for.
Pattern pattern = Pattern.compile("<div class=\"gs_a\">.+?([0-9]+) - [a-zA-Z0-9.]+</div>");
// Load content.
String line, br = System.getProperty("line.separator");
writer = new BufferedWriter(new FileWriter("download.html"));
while ((line = reader.readLine()) != null) {
writer.write(line);
writer.write(br);
// Look for matchings.
Matcher matcher = pattern.matcher(line);
while (matcher.find()) {
String match = matcher.group(1);
// Get publication year if possible.
try {
year = Integer.parseInt(match);
} catch (NumberFormatException nfe) {
continue;
}
break;
}
if (year >= 0) break;
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (conn != null) conn.disconnect();
if (reader != null) reader.close();
if (writer != null) writer.close();
} catch (IOException e) {
// Do nothing.
}
}
return year;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment