Skip to content

Instantly share code, notes, and snippets.

@danilo04
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danilo04/6b10cf89f753fb7b89bc to your computer and use it in GitHub Desktop.
Save danilo04/6b10cf89f753fb7b89bc to your computer and use it in GitHub Desktop.
Get the reactive callbacks in Android API
#!/usr/bin/python
import os
import sys
from bs4 import BeautifulSoup
from urllib2 import urlopen
import cgi
import csv
def fromurltoname(url):
return url[len("/reference/"):len(url) - len(".html")].replace("/", ".")
def genparser(url):
r = urlopen(url)
_, params = cgi.parse_header(r.headers.get('Content-Type', ''))
encoding = params.get('charset', 'utf-8')
content = r.read()
try:
html = content.decode(encoding)
except:
raise EncodingException("Error encoding url: " + url)
return BeautifulSoup(html, "lxml")
def get_packages(page):
"""
Return the packages page parsed by BeautifulSoup and extract
all the packages in the Android API
"""
packages = []
main_table = page.find("table", attrs={"class": "jd-sumtable"})
for tr in main_table.find_all("tr"):
td = tr.find_all("td")[0]
packages.append(td.a["href"])
return packages
def get_classes(package):
"""
Return the classes in the package page given parsed by BeautifulSoup
The list returned contains the URLs of the classes and interfaces
"""
classes = []
doc_col = package.find("div", id="doc-col")
if doc_col:
for h2 in doc_col.find_all("h2"):
title = h2.get_text().lower()
if title == "interfaces" or title == "classes":
#there must be an <hr> and then the <table> element
div_sumtable = h2.next_sibling.next_sibling
for tr in div_sumtable.find_all("tr"):
td = tr.find_all("td")[0]
#the cell must have a link
if td.a:
classes.append(td.a["href"])
else:
print "****Wrong package****"
return classes
def get_callbacks(clazz):
"""
Return a list of callbacks for the given class
"""
callbacks = []
def helper(id):
table = clazz.find("table", id=id)
if table:
for tr in table.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 2:
continue
td = tds[1]
method = td.get_text().strip()
method = method[0:method.find("(")]
if method.startswith("on") and len(method) > 2 and method[2].isupper():
callbacks.append(method)
helper("pubmethods")
helper("promethods")
helper("primethods")
return callbacks
def get_listeners_classes(main, start=0, maxi=-1):
"""
Return the list of listener classes or interfaces
"""
listeners = {}
packages = get_packages(genparser(main))
end = len(packages) if maxi == -1 else start + maxi
end = min(len(packages), end)
packages = packages[start:end]
for package in packages:
print "Analyzing package: " + package
if package == "/reference/android/package-summary.html":
continue
classes = get_classes(genparser(REF_PAGE + package))
for clazz in classes:
callbacks = get_callbacks(genparser(REF_PAGE + clazz))
if len(callbacks) > 0:
listeners[clazz] = callbacks
return listeners
REF_PAGE= "http://developer.android.com"
PACKAGES_PAGE = "/reference/packages.html"
START = 100
MAXI = 50
listeners = get_listeners_classes(REF_PAGE + PACKAGES_PAGE, START, MAXI)
print "Finished mining"
print "Printing results..."
flag = "w" if START == 0 else "a"
f = open("callbacks.csv", flag)
w = csv.writer(f)
for listener in listeners.keys():
clazz = fromurltoname(listener)
for callback in listeners[listener]:
w.writerow([clazz, callback])
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment