Skip to content

Instantly share code, notes, and snippets.

@bitnulleins
Last active March 15, 2022 14:25
Show Gist options
  • Save bitnulleins/5de5aa3adde3670430d94c49e1a9648e to your computer and use it in GitHub Desktop.
Save bitnulleins/5de5aa3adde3670430d94c49e1a9648e to your computer and use it in GitHub Desktop.
CSV to Parquet Converter (Python)
#!/usr/bin/python
import pandas as pd
import sys, getopt
def main(argv):
"""
Convert any csv file to parquet file with pandas library.
Requirments:
* pandas
INSTALL REQUIRMENTS
===================
pip install pandas
USAGE
=====
python csv2parquet.py %SOURCEFILE%
"""
inputfile = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hi:",["ifile="])
for opt, arg in opts:
if opt == '-h':
print('csv2parquet.py -i <inputfile>')
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
outputfile = inputfile.replace('.csv','.parquet')
csv_data = pd.read_csv(inputfile)
csv_data.to_parquet(outputfile)
print("Sucessfuly convert %s to %s"%(inputfile, outputfile))
except getopt.GetoptError:
print('csv2parquet.py -i <inputfile>')
sys.exit(2)
except pd.errors.ParserError:
print('Inputfile is not valid csv.')
sys.exit(2)
except Exception as err:
print('Error: Did you miss the -i option?')
sys.exit(2)
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment