hyunjimoon/compare.py

## compare.py
import pandas
import scipy

# 49MB
df = pd.read_csv("oldtitle.csv")

# takes 1s to create pkl, 51MB
df.to_pickle("oldtitle.pkl")

# takes 1.5s to create nc, 610MB
df.to_xarray().to_netcdf("oldtitle.nc") # if not work, import xarray

# drop me a mail amoon@mit.edu if you need oldtitle.csv

## oldtitle.csv
,title
0,Cristián Campos
1,Life & Death
2,Disguise for Murder
3,Anniversary
...
2545507,Death of a Revolutionary
2545508,"Tramp, Tramp, Tramp the Boys Are Marching"
2545510,The Second Part of Henry the Sixt
2545511,A Group of Terrorists Attacked...
2545512,The Siege of Kontum
2545513,Western Spaghetti
2545514,The Vanquished
2545515,The Final
2545516,The Awards
2545517,The Man Who Stole Uganda
2545518,V poiskakh kapitana Granta
2545519,Celebrity Special 2
2545520,Celebrity Special
2545521,The Final
2545522,The Legends
2545523,The Hunt for Doctor Mengele
2545524,Winterwatch
2545525,The Payoff
2545526,Banged Up
2545528,Celebrity Special 2
2545529,"Alas Poor Hippies, Love is Dead"
2545530,Tomorrow's Youth
	import pandas
	import scipy

	# 49MB
	df = pd.read_csv("oldtitle.csv")

	# takes 1s to create pkl, 51MB
	df.to_pickle("oldtitle.pkl")

	# takes 1.5s to create nc, 610MB
	df.to_xarray().to_netcdf("oldtitle.nc") # if not work, import xarray

	# drop me a mail amoon@mit.edu if you need oldtitle.csv
	,title
	0,Cristián Campos
	1,Life & Death
	2,Disguise for Murder
	3,Anniversary
	...
	2545507,Death of a Revolutionary
	2545508,"Tramp, Tramp, Tramp the Boys Are Marching"
	2545510,The Second Part of Henry the Sixt
	2545511,A Group of Terrorists Attacked...
	2545512,The Siege of Kontum
	2545513,Western Spaghetti
	2545514,The Vanquished
	2545515,The Final
	2545516,The Awards
	2545517,The Man Who Stole Uganda
	2545518,V poiskakh kapitana Granta
	2545519,Celebrity Special 2
	2545520,Celebrity Special
	2545521,The Final
	2545522,The Legends
	2545523,The Hunt for Doctor Mengele
	2545524,Winterwatch
	2545525,The Payoff
	2545526,Banged Up
	2545528,Celebrity Special 2
	2545529,"Alas Poor Hippies, Love is Dead"
	2545530,Tomorrow's Youth