Last active
December 28, 2015 11:58
-
-
Save arcatdmz/7496840 to your computer and use it in GitHub Desktop.
Look for the publication year of the paper. (Revised to connect to Google Scholar.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package evernote; | |
import java.io.BufferedReader; | |
import java.io.BufferedWriter; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.io.UnsupportedEncodingException; | |
import java.net.HttpURLConnection; | |
import java.net.MalformedURLException; | |
import java.net.URL; | |
import java.net.URLEncoder; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class PublicationYearSearch { | |
public static void main(String[] args) { | |
int year = getPublicationYear(args[0]); | |
System.out.print(year); | |
if (year < 0) { | |
System.exit(1); | |
} | |
} | |
public static int getPublicationYear(String title) { | |
int year = -1; | |
HttpURLConnection conn = null; | |
BufferedReader reader = null; | |
BufferedWriter writer = null; | |
try { | |
// Build query URL. | |
StringBuilder sb = new StringBuilder(); | |
sb.append("http://scholar.google.com/scholar?hl=en"); | |
sb.append("&q=%22"); | |
sb.append(URLEncoder.encode(title, "UTF-8")); | |
sb.append("%22"); | |
String urlString = sb.toString(); | |
// Open HTTP connection. | |
URL url = new URL(urlString); | |
conn = (HttpURLConnection) url.openConnection(); | |
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.44 Safari/534.7"); | |
conn.setRequestProperty("Referer", urlString); | |
conn.connect(); | |
reader = new BufferedReader( | |
new InputStreamReader((conn.getInputStream()))); | |
// Compile regex pattern to look for. | |
Pattern pattern = Pattern.compile("<div class=\"gs_a\">.+?([0-9]+) - [a-zA-Z0-9.]+</div>"); | |
// Load content. | |
String line, br = System.getProperty("line.separator"); | |
writer = new BufferedWriter(new FileWriter("download.html")); | |
while ((line = reader.readLine()) != null) { | |
writer.write(line); | |
writer.write(br); | |
// Look for matchings. | |
Matcher matcher = pattern.matcher(line); | |
while (matcher.find()) { | |
String match = matcher.group(1); | |
// Get publication year if possible. | |
try { | |
year = Integer.parseInt(match); | |
} catch (NumberFormatException nfe) { | |
continue; | |
} | |
break; | |
} | |
if (year >= 0) break; | |
} | |
} catch (UnsupportedEncodingException e) { | |
e.printStackTrace(); | |
} catch (MalformedURLException e) { | |
e.printStackTrace(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} finally { | |
try { | |
if (conn != null) conn.disconnect(); | |
if (reader != null) reader.close(); | |
if (writer != null) writer.close(); | |
} catch (IOException e) { | |
// Do nothing. | |
} | |
} | |
return year; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment