Last active
August 29, 2015 14:02
-
-
Save anupamshakya7/60607c86a7dbe9853843 to your computer and use it in GitHub Desktop.
Merge two CSV into new with ID as a common in the headers part
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os | |
if __name__ == '__main__': | |
"""This script will merge two CSV with common data as per the ID as a field | |
into new CSV. | |
::NOTE:: This program tested on 2.7.6 version of python. | |
For an example Test1.csv have this data: | |
---------------------------------------- | |
ID,name,add,school | |
1,anupam,FHG,sch1 | |
2,aditya,DFR,sch2 | |
3,urmila,HJTY,sch3 | |
4,roop,GHG | |
5,EASH,HJJ | |
Test2.csv have this data: | |
------------------------ | |
ID,Key,Value | |
1,x-jdkj,100 | |
2,k-djsh,200 | |
3,j-jdjd,300 | |
Resultant CSV have this data: | |
----------------------------- | |
add,school,ID,name,Value,Key | |
FHG,sch1,1,anupam,100,x-jdkj | |
HJTY,sch3,3,urmila,300,j-jdjd | |
DFR,sch2,2,aditya,200,k-djsh | |
How to run | |
---------- | |
On command promt go to the location where you placed your script(python script) | |
And issue this command 'python CSV.py' | |
After that it will ask for the absolute path where you have placed your two CSV with ID | |
as a common field(in header part). | |
::NOTE:: Please provide absolute path for folder like - C:\Users\hv3775\Desktop\TEST | |
That's. It will generate new CSV into the same folder that you have provided as a input path. | |
""" | |
input_folder_location = raw_input('Enter location where your required two csv files placed. NOTE:: Please enter absolute path: ') | |
infiles = [] | |
for csv_file in os.listdir(input_folder_location): | |
if csv_file.endswith('.csv'): | |
infiles.append(csv_file) | |
data = {} | |
fields = [] | |
for fname in infiles: | |
with open(fname, 'rb') as df: | |
reader = csv.DictReader(df) | |
for line in reader: | |
# assuming the field is called ID | |
if line['ID'] not in data: | |
data[line['ID']] = line | |
else: | |
for k,v in line.iteritems(): | |
if k not in data[line['ID']]: | |
data[line['ID']][k] = v | |
for k in line.iterkeys(): | |
if k not in fields: | |
fields.append(k) | |
del reader | |
data_list = [] | |
for d in data.items(): | |
if len(d[1].values()) != len(fields): | |
continue | |
data_list.append(d[1]) | |
csv_output_location = os.path.join(input_folder_location, 'Result.csv') | |
with open(csv_output_location, 'wb') as f: | |
w = csv.DictWriter(f, fields) | |
w.writeheader() | |
w.writerows(data_list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment