Skip to content

Instantly share code, notes, and snippets.

@matabares
Created October 3, 2019 19:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matabares/63606b59737f9bc13161a5223a9eccd7 to your computer and use it in GitHub Desktop.
Save matabares/63606b59737f9bc13161a5223a9eccd7 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from tabulate import tabulate
#chunksize = 10 ** 6
#for chunk in pd.read_csv('', chunksize=chunksize):
import os
source = 'c:/borreme/isa2/'
allFrames = []
for root, dirs, filenames in os.walk(source):
for file in filenames:
print(file)
df = pd.read_csv('C:/borreme/isa2/'+file, delimiter='|', names=['hotelcode', 'amenitycode', 'name', 'type'],
header=None)
df.drop_duplicates(subset='amenitycode', keep='first', inplace=True)
allFrames.append(df.copy())
del df
print(len(allFrames))
totFrame = pd.concat(allFrames)
totFrame.drop_duplicates(subset='amenitycode', keep='first', inplace=True)
totFrame.to_csv(r'C:/borreme/isa2/result.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment