|
import requests |
|
from urllib.parse import urlencode, urljoin |
|
import config |
|
import json |
|
import concurrent.futures |
|
import traceback |
|
import sys |
|
import shelve |
|
import threading |
|
|
|
|
|
# import asyncio |
|
|
|
def all_ecosystems(ecosystems): |
|
for ecosystem in ecosystems: |
|
url = urljoin(base_url, "pending/" + ecosystem) |
|
print("Invoking url: %s" % url) |
|
response = requests.get(url, headers=headers) |
|
|
|
response.raise_for_status() |
|
|
|
data = response.json() |
|
packages = data["data"] |
|
pending_count = len(packages) |
|
print("Pending %s packages = %s" % (ecosystem, pending_count)) |
|
if pending_count <= 0: |
|
continue |
|
|
|
# invoke graph_sync for this ecosystem |
|
for package in packages: |
|
do_sync(package) |
|
|
|
|
|
headers = {'Accept': 'application/json', 'auth-token': config.AUTH_TOKEN} |
|
base_url = config.BASE_URL |
|
|
|
db_lock = threading.Lock() |
|
|
|
db = shelve.open("package_shelve.db") |
|
|
|
|
|
def my_url_encode(s): |
|
return urlencode({"": s})[1:] |
|
|
|
|
|
def do_sync_version(package): |
|
""" |
|
Example: |
|
package = {'ecosystem': 'maven', 'name': 'junit:junit', 'version': '3.8'} |
|
""" |
|
ecosystem = my_url_encode(package["ecosystem"]) |
|
pname = my_url_encode(package["name"]) |
|
pversion = my_url_encode(package["version"]) |
|
|
|
url = urljoin(base_url, "sync_all/%s/%s/%s" % (ecosystem, pname, pversion)) |
|
# url = base_url + "sync_all/%s/%s" % (ecosystem, pname) |
|
|
|
print("Invoking url: %s" % url) |
|
response = requests.get(url, headers=headers) |
|
try: |
|
response.raise_for_status() |
|
except Exception: |
|
traceback.print_exc() |
|
else: |
|
key = "%s/%s/%s" % (package["ecosystem"], package["name"], package["version"]) |
|
print("COMPLETED: %s" % key) |
|
with db_lock: |
|
db[key] = 1 |
|
db.sync() |
|
|
|
|
|
def do_sync(package): |
|
""" |
|
Example: |
|
package = {'ecosystem': 'maven', 'name': 'junit:junit', 'version': '3.8'} |
|
""" |
|
ecosystem = my_url_encode(package["ecosystem"]) |
|
pname = my_url_encode(package["name"]) |
|
|
|
url = base_url + "sync_all/%s/%s" % (ecosystem, pname) |
|
|
|
print("Invoking url: %s" % url) |
|
response = requests.get(url, headers=headers) |
|
try: |
|
response.raise_for_status() |
|
except Exception: |
|
traceback.print_exc() |
|
else: |
|
key = "%s/%s" % (package["ecosystem"], package["name"]) |
|
print("COMPLETED: %s" % key) |
|
with db_lock: |
|
db[key] = 1 |
|
db.sync() |
|
|
|
|
|
granularity = "package" |
|
if len(sys.argv) >= 2: |
|
granularity = sys.argv[1] |
|
|
|
|
|
input_file = "maven-data-new.json" |
|
if len(sys.argv) >= 3: |
|
input_file = sys.argv[2] |
|
|
|
|
|
ecosystems = set(["nuget", "maven", "npm", "pypi", "go"]) |
|
if len(sys.argv) >= 4: |
|
ecosystems = set(sys.argv[3:]) |
|
|
|
|
|
fp = open(input_file, "r") |
|
data = json.load(fp) |
|
packages = data["data"] |
|
|
|
|
|
# We can use a with statement to ensure threads are cleaned up promptly |
|
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: |
|
# Start the load operations and mark each future with its URL |
|
future_to_package_map = {} |
|
|
|
invoked_list = set() |
|
|
|
if granularity == "version": |
|
for package in packages: |
|
e, p, v = (package["ecosystem"], package["name"], package["version"]) |
|
key = "%s/%s/%s" % (package["ecosystem"], package["name"], package["version"]) |
|
if package["ecosystem"] in ecosystems and key not in db: |
|
f = executor.submit(do_sync_version, package) |
|
invoked_list.add((p, v)) |
|
future_to_package_map[f] = package |
|
|
|
elif granularity == "package": |
|
for package in packages: |
|
e, p, v = (package["ecosystem"], package["name"], package["version"]) |
|
key = "%s/%s" % (package["ecosystem"], package["name"]) |
|
if package["ecosystem"] in ecosystems and key not in db: |
|
f = executor.submit(do_sync, package) |
|
invoked_list.add(p) |
|
future_to_package_map[f] = package |
|
|
|
for future in concurrent.futures.as_completed(future_to_package_map): |
|
pkg = future_to_package_map[future] |
|
try: |
|
data = future.result() |
|
# do json processing here |
|
except Exception as exc: |
|
print("FAILURE: %s" % package) |
|
print('%r generated an exception: %s' % (package, exc)) |
|
else: |
|
# print('%r page is %d bytes' % (package, len(data))) |
|
print("SUCCESS: %s" % package) |
Following error is encountered multiple times: