anupamshakya7/Merge_CSV.py

## Merge_CSV.py
import csv
import os

if __name__ == '__main__':

    """This script will merge two CSV with common data as per the ID as a field
    into new CSV.

    ::NOTE:: This program tested on 2.7.6 version of python.

    For an example Test1.csv have this data:
    ----------------------------------------
    ID,name,add,school
    1,anupam,FHG,sch1
    2,aditya,DFR,sch2
    3,urmila,HJTY,sch3
    4,roop,GHG
    5,EASH,HJJ

    Test2.csv have this data:
    ------------------------
    ID,Key,Value
    1,x-jdkj,100
    2,k-djsh,200
    3,j-jdjd,300

    Resultant CSV have this data:
    -----------------------------
    add,school,ID,name,Value,Key
    FHG,sch1,1,anupam,100,x-jdkj
    HJTY,sch3,3,urmila,300,j-jdjd
    DFR,sch2,2,aditya,200,k-djsh


    How to run
    ----------
    On command promt go to the location where you placed your script(python script)
    And issue this command 'python CSV.py'

    After that it will ask for the absolute path where you have placed your two CSV with ID
    as a common field(in header part).

    ::NOTE:: Please provide absolute path for folder like - C:\Users\hv3775\Desktop\TEST

    That's. It will generate new CSV into the same folder that you have provided as a input path.
    """

    input_folder_location = raw_input('Enter location where your required two csv files placed. NOTE:: Please enter absolute path: ')
    infiles = []

    for csv_file in os.listdir(input_folder_location):
        if csv_file.endswith('.csv'):
            infiles.append(csv_file)

    data = {}
    fields = []

    for fname in infiles:
        with open(fname, 'rb') as df:
            reader = csv.DictReader(df)
            for line in reader:
                # assuming the field is called ID
                if line['ID'] not in data:
                    data[line['ID']] = line
                else:
                    for k,v in line.iteritems():
                        if k not in data[line['ID']]:
                            data[line['ID']][k] = v
                for k in line.iterkeys():
                    if k not in fields:
                        fields.append(k)
            del reader

    data_list = []

    for d in data.items():
        if len(d[1].values()) != len(fields):
            continue
        data_list.append(d[1])

    csv_output_location = os.path.join(input_folder_location, 'Result.csv')
    with open(csv_output_location, 'wb') as f:
        w = csv.DictWriter(f, fields)
        w.writeheader()
        w.writerows(data_list)
	import csv
	import os

	if __name__ == '__main__':

	"""This script will merge two CSV with common data as per the ID as a field
	into new CSV.

	::NOTE:: This program tested on 2.7.6 version of python.

	For an example Test1.csv have this data:
	----------------------------------------
	ID,name,add,school
	1,anupam,FHG,sch1
	2,aditya,DFR,sch2
	3,urmila,HJTY,sch3
	4,roop,GHG
	5,EASH,HJJ

	Test2.csv have this data:
	------------------------
	ID,Key,Value
	1,x-jdkj,100
	2,k-djsh,200
	3,j-jdjd,300

	Resultant CSV have this data:
	-----------------------------
	add,school,ID,name,Value,Key
	FHG,sch1,1,anupam,100,x-jdkj
	HJTY,sch3,3,urmila,300,j-jdjd
	DFR,sch2,2,aditya,200,k-djsh


	How to run
	----------
	On command promt go to the location where you placed your script(python script)
	And issue this command 'python CSV.py'

	After that it will ask for the absolute path where you have placed your two CSV with ID
	as a common field(in header part).

	::NOTE:: Please provide absolute path for folder like - C:\Users\hv3775\Desktop\TEST

	That's. It will generate new CSV into the same folder that you have provided as a input path.
	"""

	input_folder_location = raw_input('Enter location where your required two csv files placed. NOTE:: Please enter absolute path: ')
	infiles = []

	for csv_file in os.listdir(input_folder_location):
	if csv_file.endswith('.csv'):
	infiles.append(csv_file)

	data = {}
	fields = []

	for fname in infiles:
	with open(fname, 'rb') as df:
	reader = csv.DictReader(df)
	for line in reader:
	# assuming the field is called ID
	if line['ID'] not in data:
	data[line['ID']] = line
	else:
	for k,v in line.iteritems():
	if k not in data[line['ID']]:
	data[line['ID']][k] = v
	for k in line.iterkeys():
	if k not in fields:
	fields.append(k)
	del reader

	data_list = []

	for d in data.items():
	if len(d[1].values()) != len(fields):
	continue
	data_list.append(d[1])

	csv_output_location = os.path.join(input_folder_location, 'Result.csv')
	with open(csv_output_location, 'wb') as f:
	w = csv.DictWriter(f, fields)
	w.writeheader()
	w.writerows(data_list)