Skip to content

Instantly share code, notes, and snippets.

@udonmai
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save udonmai/17564e6fc96ae77ac5e8 to your computer and use it in GitHub Desktop.
Save udonmai/17564e6fc96ae77ac5e8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
import collections
from pprint import *
from peewee import *
# DB init
mysql_db = MySQLDatabase('hitode', user='root')
class BaseModel(Model):
class Meta:
database = mysql_db
class Book(BaseModel):
isbn = CharField(unique=True, primary_key=True)
book_name = TextField(null=False)
author_name = TextField(null=True)
category = IntegerField(null=True)
review = TextField(null=True)
url = CharField(null=False)
image_url = CharField(null=False)
def convert(data):
if isinstance(data, basestring):
return str(data.encode('utf-8'))
elif isinstance(data, collections.Mapping):
return dict(map(convert, data.iteritems()))
elif isinstance(data, collections.Iterable):
return type(data)(map(convert, data))
else:
return data
if __name__ == '__main__':
# list
data_source = []
# unique isbn list
duplication = []
dupf = open('uisbn.txt', "r")
for line in dupf:
duplication.append(line.split('\n')[0])
dupf.close()
pprint(duplication)
#f = open('data.json', "r")
f = open('data_100000.json', "r")
books = json.load(f)
for k in books:
for inner_k in books[k]:
item = {}
book = books[k][inner_k]
if '13' not in book['isbn']:
if '10' not in book['isbn']:
continue
else:
if book['isbn']['10'] in duplication:
continue
else:
duplication.append(book['isbn']['10'])
item['isbn'] = book['isbn']['10']
else:
if book['isbn']['13'] in duplication:
continue
else:
duplication.append(book['isbn']['13'])
item['isbn'] = book['isbn']['13']
item['book_name'] = book['book_name']
item['author_name'] = book['author_name']
item['category'] = 0
item['review'] = ''
item['url'] = book['url']
item['image_url'] = book['imageurl']
data_source.append(item)
#pprint(convert(books['4174']))
#pprint(books[str(4174)])
f.close()
dupf = open('uisbn.txt', "w+")
dupf.write('\n'.join(duplication) + '\n')
dupf.close()
pprint(data_source)
#f = open('output.txt', "w+")
#f.write((repr(books)).encode('utf-8'))
#f.write(repr(convert(books)).decode('utf-8'))
#f.close()
# DataBase
mysql_db.connect()
#Book.create_table()
with mysql_db.transaction():
for idx in range(0, len(data_source), 1000):
Book.insert_many(data_source[idx:idx+1000]).execute()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment