ChronoMonochrome/.gitignore

## .gitignore
*.c
*.pyc
*.html
*.o
*.so
build

## setup.py
#!/usr/bin/env python

try:
    from setuptools import setup
    from setuptools import Extension
except ImportError:
    from distutils.core import setup
    from distutils.extension import Extension

from Cython.Distutils import build_ext
#import numpy as np

ext_modules = [Extension("vnparser",["vnparser.pyx"])]

setup(
    name= 'Visual database parser class',
    cmdclass = {'build_ext': build_ext},
    include_dirs = ['/usr/local/lib/python2.7/dist-packages/'],
    ext_modules = ext_modules)

## test.py
from vnparser import *

PATH = "../html/v"
VN_MAX_ID = 40
BUFFER_SIZE = 10

v = VNParser(PATH, VN_MAX_ID, BUFFER_SIZE, 4, False, PATH)
v.genDB()
#v.genDB_singlethreaded()

## vnparser.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import collections, sys, os
import MySQLdb
from bs4 import BeautifulSoup
from pprint import pprint
from multiprocessing  import Pool, Lock, cpu_count
from copy_reg import pickle
from types import MethodType

_MUTEX = None
_PICKLE_METHOD_REGISTRED = False

def _pickle_method(method):
	func_name = method.im_func.__name__
	obj = method.im_self
	cls = method.im_class
	return _unpickle_method, (func_name, obj, cls)

def _unpickle_method(func_name, obj, cls):
	for cls in cls.mro():
		try:
			func = cls.__dict__[func_name]
        	except KeyError:
			pass
		else:
			break
	return func.__get__(obj, cls)

if not _PICKLE_METHOD_REGISTRED:
	pickle(MethodType, _pickle_method, _unpickle_method)
	_PICKLE_METHOD_REGISTRED = True


def _MUTEX_init(mutex, out):
	global _MUTEX
	_MUTEX = mutex
	try:
		os.remove(out + "/novels.sql")
		os.remove(out + "/novels_screenshots.sql")
	except:
		pass


class VNParser(object):
	def __init__(self, path, vn_max_id, buffer_size, threads_number = None, mysql_live_execution = False, output = None):
		if not mysql_live_execution and not output:
			raise Exception("error, either mysql_live_execution or output must be specified")

		self.vn_max_id = vn_max_id
		self.buffer_size = buffer_size

		if not threads_number:
			self.threads_number = cpu_count()
		else:
			self.threads_number = threads_number

		self.mysql_live_execution = mysql_live_execution
		self.path = path
		self.output = output

		"""if mysql_live_execution:
			db = MySQLdb.connect(host='myanimespace.com', user='******', passwd='*******', db='myanimespace')
			db.set_character_set('utf8')
			cursor = db.cursor()
			cursor.execute('SET NAMES utf8;')
			cursor.execute('SET CHARACTER SET utf8;')
			cursor.execute('SET character_set_connection=utf8;')
			self._db = db
			self._cursor = cursor"""

	def __exit__(self):
		pass

	def getDictFromTable(self, table):
		res = collections.OrderedDict()

		for row in table:
			tds = row.findAll("td")

			if len(tds) == 2:
				key = tds[0].text
				value = tds[1]
				all_hrefs = value.findAll("a")

				if not all_hrefs:
					value = value.text
				else:
					value = [a.get("href") for a in all_hrefs]
			else:
				desc = row.find("td", attrs={'class': 'vndesc'})
				key = desc.find('h2').text
				value = desc.find('p').text

			res[key] = value

		return res

	def _process_multiple_args(self, args):
		start, end, writeout_args = args
		self.process(start, end, writeout_args)

	def process(self, start, end, writeout_args = None):
		novels = []
		novels_screenshots = []

		v_count = 0

		for v_id in xrange(start, end):
			try:
				html = open("%s/%d.html" % (self.path, v_id), "rb").read()
				print("open %d.html" % v_id)
			except:
				print("%d.html is not found, skipping..." % v_id)
				continue

			parsed_html = BeautifulSoup(html, "html.parser")
			vndetails = parsed_html.body.find('div', attrs={'class':'vndetails'})
			if not vndetails:
				print("page %d.html doesn't have vndetails div, skipping..." % v_id)
				continue

			try:
				screens = [img.get('src') for img in parsed_html.body.find('div', attrs={'class':'scr'}).findAll("img")]
			except:
				print("screens weren't found on page %d.html" % v_id)
				screens = None

			try:
				img = vndetails.find("p").find("img")
			except:
				print("title img was not found on page %d.html" % v_id)
				img = u'null'

			novel = self.getDictFromTable(vndetails.findAll("tr"))
			novel[u'v_id'] = v_id
			novel[u'image'] = img
			novels.append(novel)

			screen_id = 0

			for screen in screens:
				novel_screenshot = collections.OrderedDict()
				novel_screenshot[u'v_id'] = unicode(v_id)
				novel_screenshot[u'screen_id'] = unicode(screen_id)
				novel_screenshot[u'screenshots'] = screen
				novels_screenshots.append(novel_screenshot)
				screen_id += 1

			"""if self.mysql_live_execution:
				v_count += 1

				if v_count > BUFFER_SIZE:
					print("cur v_id %d" % (v_id))
					self.mysqlQueries(novels)
					novels.clear()
					v_count = 0
					self.db.commit()

				# если еще остались
				if (v_id == end) and len(novels):
					q = self.mysqlQueries(novels)
					#print("MySQL queries:");pprint(q)"""
		if writeout_args:
			filepath, id = writeout_args
			def writeout(name, tables, tables_names_conversion):
				s = self.mysqlQueries(name, tables, tables_names_conversion)

				with _MUTEX:
					f = open(filepath + "/" + name + ".sql", 'ab')
					f.write(s)
					f.close()

			novels_names_conversion = collections.OrderedDict()
			novels_names_conversion['v_id'] = u'v_id'
			novels_names_conversion['romaji'] = u'Title'
			novels_names_conversion['japanese'] = u'Original title'
			novels_names_conversion['duration'] = u'Length'
			novels_names_conversion['synopsis_en'] = u'Description'
			novels_names_conversion['posters'] = u'image'

			writeout("novels", novels, novels_names_conversion)


			novels_screenshots_names_conversion = collections.OrderedDict()
			novels_screenshots_names_conversion['v_id'] = u'v_id'
			novels_screenshots_names_conversion['screen_id'] = u'screen_id'
			novels_screenshots_names_conversion['screenshots'] = u'screenshots'
			writeout("novels_screenshots", novels_screenshots, novels_screenshots_names_conversion)

			#pprint(s)

	def mysqlQueries(self, tables_name, tables, table_column_dict):
		names, values = table_column_dict.keys(), table_column_dict.values()

		def prepare(table, values):
			s = u"(%s),\n" % ("'%s',\n" * (len(values) - 1) + "'%s'\n")
			values = [unicode(table.get(i, u'null')).replace("'", "\\'") for i in values]
			#print(s, values)
			return s % tuple(values)

		s = ("""INSERT INTO %s (%s) VALUES""" % (tables_name, "%s,\n" * (len(names) - 1) + '%s\n')) % (tuple(names))

		if not self.mysql_live_execution:

			for table in tables:
				s += prepare(table, values)

			s = s[:-2] + u";\n"
			return s.encode("u8")
		"""else:
			for table in tables:
				v_id, t = table
				placeholders = (str(v_id), t.get(u'Title', 'null'), t.get(u'Original title', 'null'), t.get(u'Length', 'null'), t.get(u'Description', 'null'), t.get(u'image', 'null'))
				# для определения sql с ошибкой
				#f = open('mysql.txt', 'w')
				#f.write(s)
				#f.write(', '.join(placeholders))
				#f.close()
				self.cursor.execute(s, placeholders)"""


	def genDB(self):
		if not self.mysql_live_execution:
			c = 0
			start, end = 0, self.buffer_size

			mutex = Lock()

			pool = Pool(processes = self.threads_number, initializer = _MUTEX_init, initargs = (mutex, self.output))

			while c <= self.vn_max_id:
				args_map = []
				for i in range(self.threads_number):
					if end > self.vn_max_id:
						end = self.vn_max_id

					args_map.append((start + 1, end + 1, [self.output, c]))
					start += self.buffer_size
					end += self.buffer_size
				c = end + 1

				pool.map_async(self._process_multiple_args, args_map).get(9999999)
			pool.close()
			pool.join()

	def genDB_singlethreaded(self):
		# kept for debugging purposes only
		_MUTEX_init(Lock(), self.output)
		if not self.mysql_live_execution:
			c = 0
			start, end = 0, self.buffer_size

			while c <= self.vn_max_id:
				for i in range(self.threads_number):
					if end > self.vn_max_id:
						end = self.vn_max_id

					self._process_multiple_args([start + 1, end + 1, [self.output, c]])
					start += self.buffer_size
					end += self.buffer_size
				c = end + 1


if __name__ == '__main__':

	PATH = "html/v"
	VN_MAX_ID = 20137
	BUFFER_SIZE = 250

	#  VNParser(path, vn_max_id, buffer_size, threads_number = None, mysql_live_execution = False, output = PATH)
	vnparser = VNParser(PATH, VN_MAX_ID, BUFFER_SIZE, 4, False, PATH)
	vnparser.genDB()
	#vnparser.genDB_singlethreaded()

## vnparser.pyx
# -*- coding: utf-8 -*-

import collections, sys, os
import MySQLdb
from bs4 import BeautifulSoup
from pprint import pprint
from multiprocessing  import Pool, Lock, cpu_count
from copy_reg import pickle
from types import MethodType

_PICKLE = False

def _pickle_method(method):
	func_name = method.im_func.__name__
	obj = method.im_self
	cls = method.im_class
	return _unpickle_method, (func_name, obj, cls)

def _unpickle_method(func_name, obj, cls):
	break_ = 0
	for cls in cls.mro():
		if break_: break
		try:
			func = cls.__dict__[func_name]
		except KeyError:
			pass
	else:
		break_ = 1
	return func.__get__(obj, cls)

if not _PICKLE:
	pickle(MethodType, _pickle_method, _unpickle_method)
	_PICKLE = True

cdef class VNParser(object):
	cdef public bytes path
	cdef public bytes output
	cdef public int vn_max_id
	cdef public int buffer_size
	cdef public int threads_number
	cdef public int mysql_live_execution

	cdef public object _cursor
	cdef public object _db
	cdef public object _mutex

	def __init__(self, output, vn_max_id, buffer_size, threads_number = None, mysql_live_execution = False, path = None):
		if not mysql_live_execution and not path:
			raise Exception("error, either mysql_live_execution or path must be specified")


		self.vn_max_id = vn_max_id
		self.buffer_size = buffer_size

		if not threads_number:
			self.threads_number = cpu_count()
		else:
			self.threads_number = threads_number

		self.mysql_live_execution = mysql_live_execution
		self.path = path
		self.output = output
		if mysql_live_execution:
			db = MySQLdb.connect(host='myanimespace.com', user='******', passwd='*******', db='myanimespace')
			db.set_character_set('utf8')
			cursor = db.cursor()
			cursor.execute('SET NAMES utf8;')
			cursor.execute('SET CHARACTER SET utf8;')
			cursor.execute('SET character_set_connection=utf8;')
			self._db = db
			self._cursor = cursor


	cdef inline getDictFromTable(self, table):
		res = collections.OrderedDict()

		for row in table:
			tds = row.findAll("td")

			if len(tds) == 2:
				key = tds[0].text
				value = tds[1]
				all_hrefs = value.findAll("a")

				if not all_hrefs:
					value = value.text
				else:
					value = [a.get("href") for a in all_hrefs]
			else:
				desc = row.find("td", attrs={'class': 'vndesc'})
				key = desc.find('h2').text
				value = desc.find('p').text

			res[key] = value

		return res

	def _process_multiple_args(self, args):
		start, end, writeout_args = args
		self.process(start, end, writeout_args)

	cdef inline process(self, start, end, writeout_args = None):
		tables = collections.OrderedDict()

		v_count = 0

		for v_id in xrange(start, end):
			try:
				html = open("%s/%d.html" % (self.path, v_id), "rb").read()
				print("open %d.html" % v_id)
			except:
				print("%d.html is not found, skipping..." % v_id)
				continue

			parsed_html = BeautifulSoup(html, "html.parser")
			vndetails = parsed_html.body.find('div', attrs={'class':'vndetails'})
			if not vndetails:
				print("page %d.html doesn't have vndetails div, skipping..." % v_id)
				continue

			try:
				screens = [img.get('src') for img in parsed_html.body.find('div', attrs={'class':'scr'}).findAll("img")]
			except:
				print("screens weren't found on page %d.html" % v_id)
				screens = u'null'

			try:
				img = vndetails.find("p").find("img")
			except:
				#catch()
				print("title img was not found on page %d.html" % v_id)
				img = u'null'

			table = self.getDictFromTable(vndetails.findAll("tr"))
			table[u'image'] = img
			table[u'screens'] = screens
			tables[v_id] = table

			if self.mysql_live_execution:
				v_count += 1

				if v_count > BUFFER_SIZE:
					print("cur v_id %d" % (v_id))
					self.mysqlQueries(tables)
					tables.clear()
					v_count = 0
					self.db.commit()

				# если еще остались
				if (v_id == end) and len(tables):
					q = self.mysqlQueries(tables)
					#print("MySQL queries:");pprint(q)
		if writeout_args:
			filename, id = writeout_args
			with self._mutex:
				s = self.mysqlQueries(tables)
				f = open(filename, 'ab')
				f.write(s)
				f.close()
				#pprint(s)

	cdef inline mysqlQueries(self, tables):
		s = """
		INSERT INTO
			novels
			(
				id,
				romaji,
				japanese,
				duration,
				synopsis_en,
				posters
			)
		VALUES
		"""
		if not self.mysql_live_execution:
			def prepare(v_data, args):
				s = u"""
			(
				'%s',
				'%s',
				'%s',
				'%s',
				'%s',
				'%s',
			),"""
				args = [unicode(v_data.get(i, u'null')).replace("'", "\\'") for i in args]
				return s % tuple(args)

			for v_id in tables:
				v_data = tables[v_id]
				s += prepare(v_data,
						 [unicode(v_id),  u'Title', u'Original title', u'Length', u'Description', u'image'])

			s = s[:-1] + u";\n"
			return s.encode("u8")
		else:
			for v_id in tables:
				t = tables[v_id]
				placeholders = (str(v_id), t.get(u'Title', 'null'), t.get(u'Original title', 'null'), t.get(u'Length', 'null'), t.get(u'Description', 'null'), t.get(u'image', 'null'))
				# для определения sql с ошибкой
				#f = open('mysql.txt', 'w')
				#f.write(s)
				#f.write(', '.join(placeholders))
				#f.close()
				self.cursor.execute(s, placeholders)

	cdef _MUTEX_init(self, mutex, out):
		self._mutex = mutex
		try:
			os.remove(out)
		except:
			pass

	def genDB(self):
		if not self.mysql_live_execution:
			c = 0
			start, end = 0, self.buffer_size

			mutex = Lock()

			pool = Pool(processes = self.threads_number, initializer = self._MUTEX_init, initargs = (self, mutex, self.output))

			while c <= self.vn_max_id:
				args_map = []
				for i in range(self.threads_number):
					if end > self.vn_max_id:
						end = self.vn_max_id
					args_map.append((start + 1, end + 1, [self.output, c]))
					start += self.buffer_size
					end += self.buffer_size
				c = end + 1

				pool.map_async(self._process_multiple_args, args_map).get(9999999)
			pool.close()
			pool.join()
		else:
			self.process(3184, self.vn_max_id);
			self.db.commit()
			self.db.close()

if __name__ == '__main__':
	PATH = "html/v"
	VN_MAX_ID = 20137
	BUFFER_SIZE = 250

	#  VNParser(output, vn_max_id, buffer_size, threads_number = None, mysql_live_execution = False, path = PATH)
	vnparser = VNParser(PATH + "/sql_new.txt", VN_MAX_ID, BUFFER_SIZE, 4, False, PATH)
	vnparser.genDB()
	#!/usr/bin/env python

	try:
	from setuptools import setup
	from setuptools import Extension
	except ImportError:
	from distutils.core import setup
	from distutils.extension import Extension

	from Cython.Distutils import build_ext
	#import numpy as np

	ext_modules = [Extension("vnparser",["vnparser.pyx"])]

	setup(
	name= 'Visual database parser class',
	cmdclass = {'build_ext': build_ext},
	include_dirs = ['/usr/local/lib/python2.7/dist-packages/'],
	ext_modules = ext_modules)
	from vnparser import *

	PATH = "../html/v"
	VN_MAX_ID = 40
	BUFFER_SIZE = 10

	v = VNParser(PATH, VN_MAX_ID, BUFFER_SIZE, 4, False, PATH)
	v.genDB()
	#v.genDB_singlethreaded()
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import collections, sys, os
	import MySQLdb
	from bs4 import BeautifulSoup
	from pprint import pprint
	from multiprocessing import Pool, Lock, cpu_count
	from copy_reg import pickle
	from types import MethodType

	_MUTEX = None
	_PICKLE_METHOD_REGISTRED = False

	def _pickle_method(method):
	func_name = method.im_func.__name__
	obj = method.im_self
	cls = method.im_class
	return _unpickle_method, (func_name, obj, cls)

	def _unpickle_method(func_name, obj, cls):
	for cls in cls.mro():
	try:
	func = cls.__dict__[func_name]
	except KeyError:
	pass
	else:
	break
	return func.__get__(obj, cls)

	if not _PICKLE_METHOD_REGISTRED:
	pickle(MethodType, _pickle_method, _unpickle_method)
	_PICKLE_METHOD_REGISTRED = True


	def _MUTEX_init(mutex, out):
	global _MUTEX
	_MUTEX = mutex
	try:
	os.remove(out + "/novels.sql")
	os.remove(out + "/novels_screenshots.sql")
	except:
	pass



	class VNParser(object):
	def __init__(self, path, vn_max_id, buffer_size, threads_number = None, mysql_live_execution = False, output = None):
	if not mysql_live_execution and not output:
	raise Exception("error, either mysql_live_execution or output must be specified")

	self.vn_max_id = vn_max_id
	self.buffer_size = buffer_size

	if not threads_number:
	self.threads_number = cpu_count()
	else:
	self.threads_number = threads_number

	self.mysql_live_execution = mysql_live_execution
	self.path = path
	self.output = output

	"""if mysql_live_execution:
	db = MySQLdb.connect(host='myanimespace.com', user='****', passwd='*****', db='myanimespace')
	db.set_character_set('utf8')
	cursor = db.cursor()
	cursor.execute('SET NAMES utf8;')
	cursor.execute('SET CHARACTER SET utf8;')
	cursor.execute('SET character_set_connection=utf8;')
	self._db = db
	self._cursor = cursor"""

	def __exit__(self):
	pass

	def getDictFromTable(self, table):
	res = collections.OrderedDict()

	for row in table:
	tds = row.findAll("td")

	if len(tds) == 2:
	key = tds[0].text
	value = tds[1]
	all_hrefs = value.findAll("a")

	if not all_hrefs:
	value = value.text
	else:
	value = [a.get("href") for a in all_hrefs]
	else:
	desc = row.find("td", attrs={'class': 'vndesc'})
	key = desc.find('h2').text
	value = desc.find('p').text

	res[key] = value

	return res

	def _process_multiple_args(self, args):
	start, end, writeout_args = args
	self.process(start, end, writeout_args)

	def process(self, start, end, writeout_args = None):
	novels = []
	novels_screenshots = []

	v_count = 0

	for v_id in xrange(start, end):
	try:
	html = open("%s/%d.html" % (self.path, v_id), "rb").read()
	print("open %d.html" % v_id)
	except:
	print("%d.html is not found, skipping..." % v_id)
	continue

	parsed_html = BeautifulSoup(html, "html.parser")
	vndetails = parsed_html.body.find('div', attrs={'class':'vndetails'})
	if not vndetails:
	print("page %d.html doesn't have vndetails div, skipping..." % v_id)
	continue

	try:
	screens = [img.get('src') for img in parsed_html.body.find('div', attrs={'class':'scr'}).findAll("img")]
	except:
	print("screens weren't found on page %d.html" % v_id)
	screens = None

	try:
	img = vndetails.find("p").find("img")
	except:
	print("title img was not found on page %d.html" % v_id)
	img = u'null'

	novel = self.getDictFromTable(vndetails.findAll("tr"))
	novel[u'v_id'] = v_id
	novel[u'image'] = img
	novels.append(novel)

	screen_id = 0

	for screen in screens:
	novel_screenshot = collections.OrderedDict()
	novel_screenshot[u'v_id'] = unicode(v_id)
	novel_screenshot[u'screen_id'] = unicode(screen_id)
	novel_screenshot[u'screenshots'] = screen
	novels_screenshots.append(novel_screenshot)
	screen_id += 1

	"""if self.mysql_live_execution:
	v_count += 1

	if v_count > BUFFER_SIZE:
	print("cur v_id %d" % (v_id))
	self.mysqlQueries(novels)
	novels.clear()
	v_count = 0
	self.db.commit()

	# если еще остались
	if (v_id == end) and len(novels):
	q = self.mysqlQueries(novels)
	#print("MySQL queries:");pprint(q)"""
	if writeout_args:
	filepath, id = writeout_args
	def writeout(name, tables, tables_names_conversion):
	s = self.mysqlQueries(name, tables, tables_names_conversion)

	with _MUTEX:
	f = open(filepath + "/" + name + ".sql", 'ab')
	f.write(s)
	f.close()

	novels_names_conversion = collections.OrderedDict()
	novels_names_conversion['v_id'] = u'v_id'
	novels_names_conversion['romaji'] = u'Title'
	novels_names_conversion['japanese'] = u'Original title'
	novels_names_conversion['duration'] = u'Length'
	novels_names_conversion['synopsis_en'] = u'Description'
	novels_names_conversion['posters'] = u'image'

	writeout("novels", novels, novels_names_conversion)


	novels_screenshots_names_conversion = collections.OrderedDict()
	novels_screenshots_names_conversion['v_id'] = u'v_id'
	novels_screenshots_names_conversion['screen_id'] = u'screen_id'
	novels_screenshots_names_conversion['screenshots'] = u'screenshots'
	writeout("novels_screenshots", novels_screenshots, novels_screenshots_names_conversion)

	#pprint(s)

	def mysqlQueries(self, tables_name, tables, table_column_dict):
	names, values = table_column_dict.keys(), table_column_dict.values()

	def prepare(table, values):
	s = u"(%s),\n" % ("'%s',\n" * (len(values) - 1) + "'%s'\n")
	values = [unicode(table.get(i, u'null')).replace("'", "\\'") for i in values]
	#print(s, values)
	return s % tuple(values)

	s = ("""INSERT INTO %s (%s) VALUES""" % (tables_name, "%s,\n" * (len(names) - 1) + '%s\n')) % (tuple(names))

	if not self.mysql_live_execution:

	for table in tables:
	s += prepare(table, values)

	s = s[:-2] + u";\n"
	return s.encode("u8")
	"""else:
	for table in tables:
	v_id, t = table
	placeholders = (str(v_id), t.get(u'Title', 'null'), t.get(u'Original title', 'null'), t.get(u'Length', 'null'), t.get(u'Description', 'null'), t.get(u'image', 'null'))
	# для определения sql с ошибкой
	#f = open('mysql.txt', 'w')
	#f.write(s)
	#f.write(', '.join(placeholders))
	#f.close()
	self.cursor.execute(s, placeholders)"""


	def genDB(self):
	if not self.mysql_live_execution:
	c = 0
	start, end = 0, self.buffer_size

	mutex = Lock()

	pool = Pool(processes = self.threads_number, initializer = _MUTEX_init, initargs = (mutex, self.output))

	while c <= self.vn_max_id:
	args_map = []
	for i in range(self.threads_number):
	if end > self.vn_max_id:
	end = self.vn_max_id

	args_map.append((start + 1, end + 1, [self.output, c]))
	start += self.buffer_size
	end += self.buffer_size
	c = end + 1

	pool.map_async(self._process_multiple_args, args_map).get(9999999)
	pool.close()
	pool.join()

	def genDB_singlethreaded(self):
	# kept for debugging purposes only
	_MUTEX_init(Lock(), self.output)
	if not self.mysql_live_execution:
	c = 0
	start, end = 0, self.buffer_size

	while c <= self.vn_max_id:
	for i in range(self.threads_number):
	if end > self.vn_max_id:
	end = self.vn_max_id

	self._process_multiple_args([start + 1, end + 1, [self.output, c]])
	start += self.buffer_size
	end += self.buffer_size
	c = end + 1


	if __name__ == '__main__':

	PATH = "html/v"
	VN_MAX_ID = 20137
	BUFFER_SIZE = 250

	# VNParser(path, vn_max_id, buffer_size, threads_number = None, mysql_live_execution = False, output = PATH)
	vnparser = VNParser(PATH, VN_MAX_ID, BUFFER_SIZE, 4, False, PATH)
	vnparser.genDB()
	#vnparser.genDB_singlethreaded()