polynomialherder/load_csv.py

## load_csv.py
# Using SQLite's CSV mode from Python for extremely fast bulk data loads
#
# Note that if the target table does not exist, SQLite will create it automatically,
# creating column names from the first row in the CSV assuming TEXT datatypes.
#
# If the target table does exist, SQLite will treat the first row in the CSV file
# as *data*. So if the CSV file contains a header, you should either delete it from the
# source file, or, if using SQLite > 3.32.0, pass the `--skip 1` flag to the `.import`
# command (`import data.csv Data --skip 1`)
#
# This approach assumes that your CSV data is well-formed, although SQLite will robustly handle
# exotic character sets and certain kinds of CSV parse errors, emitting a warning and continuing
# if possible
#
# This approach tends to dramatically outperform common approaches such as reading the data using the
# built in csv module and calling `sqlite3.connection.executemany`, or using `pandas.Dataframe.to_sql`.

import subprocess

if __name__ == '__main__':
     with subprocess.Popen(
         ["sqlite3", "testdb"],
         universal_newlines=True,
         stdin=subprocess.PIPE,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT
     ) as sqlite:
          sqlite.stdin.write(".mode csv\n")
          sqlite.stdin.write('.separator "\t"\n')
          sqlite.stdin.write(".import data.tsv Data\n")
          sqlite.stdin.write(".quit\n")
          sqlite.stdin.flush()
          # Read stdout output and read to CSV
          for stdout_line in iter(sqlite.stdout.readline, ""):
              print(stdout_line)
	# Using SQLite's CSV mode from Python for extremely fast bulk data loads
	#
	# Note that if the target table does not exist, SQLite will create it automatically,
	# creating column names from the first row in the CSV assuming TEXT datatypes.
	#
	# If the target table does exist, SQLite will treat the first row in the CSV file
	# as data. So if the CSV file contains a header, you should either delete it from the
	# source file, or, if using SQLite > 3.32.0, pass the `--skip 1` flag to the `.import`
	# command (`import data.csv Data --skip 1`)
	#
	# This approach assumes that your CSV data is well-formed, although SQLite will robustly handle
	# exotic character sets and certain kinds of CSV parse errors, emitting a warning and continuing
	# if possible
	#
	# This approach tends to dramatically outperform common approaches such as reading the data using the
	# built in csv module and calling `sqlite3.connection.executemany`, or using `pandas.Dataframe.to_sql`.

	import subprocess

	if __name__ == '__main__':
	with subprocess.Popen(
	["sqlite3", "testdb"],
	universal_newlines=True,
	stdin=subprocess.PIPE,
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT
	) as sqlite:
	sqlite.stdin.write(".mode csv\n")
	sqlite.stdin.write('.separator "\t"\n')
	sqlite.stdin.write(".import data.tsv Data\n")
	sqlite.stdin.write(".quit\n")
	sqlite.stdin.flush()
	# Read stdout output and read to CSV
	for stdout_line in iter(sqlite.stdout.readline, ""):
	print(stdout_line)