kamipatel/do.py

## do.py
'''
Author: Kam & Iyer
Setup: To run this code
0 - Copy previous quater csv as old.csv and current quater csv as new.csv
1 - Install python 3
2 - Run command "pip install pandas"
3 - Run command "python3 do.py"
4 - Results should be in out.csv... (check last what column to see why this row was included)
'''

import pandas as pd

df1 = pd.read_csv ('old.csv')
df1.columns = df1.columns.str.lstrip()
df1.columns = df1.columns.str.rstrip()

df2 = pd.read_csv ('new.csv')
df2.columns = df2.columns.str.lstrip()
df2.columns = df2.columns.str.rstrip()

df = pd.merge(left=df1, right=df2, left_on='Company Name', right_on='Company Name')
print(len(df))

cols = df1.columns
cols = [x for x in cols if x != 'Company Name']

for i in cols:
    oldcol = i + "_x"
    newcol = i + "_y"
    if oldcol not in df or newcol not in df:
        print("Removing", i)
        cols = [x for x in cols if x != i]


data = []

df['what'] = ""

for index, row in df.iterrows():
    gotit = False
    colsChanged = []
    for i in cols:
        oldcol = i + "_x"
        newcol = i + "_y"
        if row[oldcol] != "Yes" and row[newcol] == "Yes":
            gotit = True
            colsChanged.append(i)
    if(gotit):
        #print("Got it")
        print(row["Company Name"])
        row['what'] = colsChanged
        data.append(row)
        gotit = False

dfdup = pd.DataFrame(data)
dfdup.to_csv('out-dup.csv')

df = pd.DataFrame(data)
df.drop_duplicates('Company Name', inplace = True)
df.to_csv('out.csv')


print("done!")
	'''
	Author: Kam & Iyer
	Setup: To run this code
	0 - Copy previous quater csv as old.csv and current quater csv as new.csv
	1 - Install python 3
	2 - Run command "pip install pandas"
	3 - Run command "python3 do.py"
	4 - Results should be in out.csv... (check last what column to see why this row was included)
	'''

	import pandas as pd

	df1 = pd.read_csv ('old.csv')
	df1.columns = df1.columns.str.lstrip()
	df1.columns = df1.columns.str.rstrip()

	df2 = pd.read_csv ('new.csv')
	df2.columns = df2.columns.str.lstrip()
	df2.columns = df2.columns.str.rstrip()

	df = pd.merge(left=df1, right=df2, left_on='Company Name', right_on='Company Name')
	print(len(df))

	cols = df1.columns
	cols = [x for x in cols if x != 'Company Name']

	for i in cols:
	oldcol = i + "_x"
	newcol = i + "_y"
	if oldcol not in df or newcol not in df:
	print("Removing", i)
	cols = [x for x in cols if x != i]


	data = []

	df['what'] = ""

	for index, row in df.iterrows():
	gotit = False
	colsChanged = []
	for i in cols:
	oldcol = i + "_x"
	newcol = i + "_y"
	if row[oldcol] != "Yes" and row[newcol] == "Yes":
	gotit = True
	colsChanged.append(i)
	if(gotit):
	#print("Got it")
	print(row["Company Name"])
	row['what'] = colsChanged
	data.append(row)
	gotit = False

	dfdup = pd.DataFrame(data)
	dfdup.to_csv('out-dup.csv')

	df = pd.DataFrame(data)
	df.drop_duplicates('Company Name', inplace = True)
	df.to_csv('out.csv')


	print("done!")