Last active
August 29, 2015 14:07
-
-
Save FriedEgg/f33a798db6c01fd3a082 to your computer and use it in GitHub Desktop.
An interface for collecting search results from the unofficial Google Trends API using PROC GROOVY in SAS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** GoogleTrends4SAS | |
* | |
* Description: An interface for collecting search results from the unofficial Google Trends API using PROC GROOVY | |
* | |
* Note: YOU MUST ALTER THE GROOVY CODE MANUALLY. | |
* Find the value of PREF cookie on the machine executing the code and enter it below. | |
* Note: As writted here for SAS 9.4 on Linux x64, available versions of Groovy for sasjar will be dependent on version and OS | |
*/ | |
filename cp temp; | |
filename ivy "%sysfunc(pathname(work,l))/ivy.jar"; | |
proc http | |
method = 'get' | |
url = 'http://central.maven.org/maven2/org/apache/ivy/ivy/2.3.0-rc1/ivy-2.3.0-rc1.jar' | |
out = ivy | |
; | |
run; | |
proc groovy classpath=cp; | |
add classpath=ivy; | |
add sasjar="groovy_2.1.3" version="2.1.3.0_SAS_20130517000930"; | |
submit parseonly; | |
import groovyx.net.http.HTTPBuilder | |
import groovyx.net.http.URIBuilder | |
import groovy.json.JsonSlurper | |
import java.util.regex.Pattern | |
import java.util.regex.Matcher | |
import static groovyx.net.http.ContentType.TEXT | |
import static groovyx.net.http.Method.GET | |
import org.apache.http.impl.cookie.BasicClientCookie | |
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7') | |
class GoogleTrendApi { | |
def search(String q, String geo) throws Exception { | |
def trends = [] | |
def url = new URIBuilder('http://www.google.com/').with { | |
path = 'trends/fetchComponent' | |
query = [q: q, geo: geo, cid: "TIMESERIES_GRAPH_0", export: 3] | |
return it | |
}.toString() | |
def api = new HTTPBuilder(url) | |
// lookup PREF cookie value and enter below | |
def prefCookieVal="<<<YOUR_PREF_COOKIE_VALUE>>>" | |
def cookie = new BasicClientCookie('PREF', prefCookieVal) | |
cookie["domain"] = "google.com" | |
cookie["path"] = "/" | |
api.client.cookieStore.addCookie cookie | |
api.request(GET, TEXT) { req -> | |
headers.'User-Agent' = "Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0" | |
response.failure = { resp -> | |
resp.headers.each { println "${it.name} : ${it.value}" } | |
throw new Exception() | |
} | |
response.success = { resp, reader -> | |
String text = reader.text | |
Matcher m = Pattern.compile("^[^\\(]+\\((\\{.*\\})\\);\$").matcher(text.replaceAll("(new\\ Date\\((\\d{4}),(\\d{1,2}),(\\d{1,2})\\))", "{\"year\": \"\$2\", \"month\": \"\$3\", \"day\": \"\$4\"}")) | |
if (m.find()) { | |
def slurper = new JsonSlurper() | |
def json = slurper.parseText(m.group(1)) | |
json.table.rows.each { | |
trends << [ | |
date : new Date(Integer.parseInt(it.c[0].v.year)-1900, Integer.parseInt(it.c[0].v.month), Integer.parseInt(it.c[0].v.day)), | |
val : Integer.parseInt(it.c[1].f) | |
] | |
} | |
} | |
else { | |
throw new Exception() | |
} | |
} | |
} | |
return trends | |
} | |
} | |
endsubmit; | |
submit parseonly; | |
import java.text.SimpleDateFormat; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.LinkedHashMap; | |
public class GoogleTrends4SAS { | |
public String q = ""; | |
public String geo = ""; | |
public void main() throws Exception { | |
GoogleTrendApi api = new GoogleTrendApi(); | |
trends = ((ArrayList) (api.search(q, geo))); | |
iter = trends.iterator(); | |
} | |
public boolean hasNext() { | |
return iter.hasNext(); | |
} | |
public void getNext() { | |
trend = ((LinkedHashMap) (iter.next())); | |
} | |
public String getDateString() { | |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); | |
return sdf.format(trend.get("date")); | |
} | |
public int getVal() { | |
return ((int) (trend.get("val"))); | |
} | |
protected ArrayList trends; | |
protected Iterator iter; | |
protected LinkedHashMap trend; | |
} | |
endsubmit; | |
run; | |
options set=classpath "%sysfunc(pathname(cp,f))"; | |
data google_trends; | |
dcl javaobj GoogleTrends("GoogleTrends4SAS"); | |
GoogleTrends.setStringField("q", "sas"); | |
GoogleTrends.setStringField("geo", "US"); | |
rc = GoogleTrends.callVoidMethod("main"); | |
rc = GoogleTrends.ExceptionCheck(e); *usage exceeded; | |
if (e) then do; | |
put 'exception occurred'; | |
goto exit; | |
end; | |
GoogleTrends.callBooleanMethod("hasNext", rc); | |
do _n_=1 by 1 while(rc); | |
GoogleTrends.callVoidMethod("getNext"); | |
format date date9. DateString $10.; | |
GoogleTrends.callStringMethod("getDateString", DateString); | |
date = input(DateString, yymmdd10.); | |
format val comma8.; | |
GoogleTrends.callIntMethod("getVal", val); | |
output; | |
GoogleTrends.callBooleanMethod("hasNext", rc); | |
end; | |
keep date val; | |
exit: | |
stop; | |
run; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment