Skip to content

Instantly share code, notes, and snippets.

@lusentis
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lusentis/8ea04b66788d7a94b3ae to your computer and use it in GitHub Desktop.
Save lusentis/8ea04b66788d7a94b3ae to your computer and use it in GitHub Desktop.
# -.- coding: utf-8 -.-
import json
import urllib2
import predictionio
client = predictionio.Client(appkey="XXXXXXXXXXXXXXXXXXXXXXX")
total = 6028561
size = 1000
iter_ = 715
while iter_ * size < total:
from_ = iter_ * size
data = urllib2.urlopen("XXXXXXXXXXXXX/_search?size=%d&from=%d" % (size, from_))
data = json.load(data)
for video in data['hits']['hits']:
video = video.get("_source")
print u" ".join(["import", video.get("_id"), "tags", u",".join(video.get("g"))])
if not video.get("_id") or not video.get("g"):
break
try:
client.create_item(video.get("_id"), map(lambda g: g.encode("ascii", "ignore").replace("\t", "").replace(",", ""), video.get("g")))
except predictionio.ItemNotCreatedError, e:
print "Error importing item"
iter_ = iter_ + 1
print "Batch %d completed" % iter_
PredictionIO==0.7.0
argparse==1.2.1
wsgiref==0.1.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment