saketkc/journal_overlap.py

## journal_overlap.py
import pandas as pd
from operator import itemgetter
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
beall_df = pd.read_csv('Beall_list.txt', header=None, sep='\t', names=['names'])
j1_df = pd.read_csv('J1.csv',  header=None, names=['names'])
j2_df = pd.read_csv('J2.csv',  header=None, names=['names'])
j3_df = pd.read_csv('J3.csv',  header=None, names=['names'])
j4_df = pd.read_csv('J4.csv',  header=None, names=['names'])
j5_df = pd.read_csv('J5.csv',  header=None, names=['names'])

beall_df_names = beall_df.names.str.replace(' ','').str.lower()
j1_df_names = j1_df.names.str.replace(' ','').str.lower().tolist()
j2_df_names = j2_df.names.str.replace(' ','').str.lower().tolist()
j3_df_names = j3_df.names.str.replace(' ','').str.lower().tolist()
j4_df_names = j4_df.names.str.replace(' ','').str.lower().tolist()
j5_df_names = j5_df.names.str.replace(' ','').str.lower().tolist()


j1_matches = [('xxxxx',0)]
j2_matches = [('xxxxx',0)]
j3_matches = [('xxxxx',0)]
j4_matches = [('xxxxx',0)]
j5_matches = [('xxxxx',0)]


for name in beall_df_names:
    match = process.extractOne(name, j1_df_names)
    j1_matches.append(match)

    match = process.extractOne(name, j2_df_names)
    j2_matches.append(match)

    match = process.extractOne(name, j3_df_names)
    j3_matches.append(match)

    match = process.extractOne(name, j4_df_names)
    j4_matches.append(match)

    match = process.extractOne(name, j5_df_names)
    j5_matches.append(match)

with open('j1_matches.csv', 'w') as fh:
    for item in j1_matches:
        if item:
            fh.write("{},{}\n".format(item[0], item[1]))

with open('j2_matches.csv', 'w') as fh:
    for item in j2_matches:
        if item:
            fh.write("{},{}\n".format(item[0], item[1]))

with open('j3_matches.csv', 'w') as fh:
    for item in j3_matches:
        if item:
            fh.write("{},{}\n".format(item[0], item[1]))


with open('j4_matches.csv', 'w') as fh:
    for item in j4_matches:
        if item:
            fh.write("{},{}\n".format(item[0], item[1]))


with open('j5_matches.csv', 'w') as fh:
    for item in j5_matches:
        if item:
            fh.write("{},{}\n".format(item[0], item[1]))
"""
Run:

$ sort  --field-separator=',' -k2 -n j1_matches.csv
$ sort  --field-separator=',' -k2 -n j2_matches.csv
$ sort  --field-separator=',' -k2 -n j3_matches.csv
$ sort  --field-separator=',' -k2 -n j4_matches.csv
$ sort  --field-separator=',' -k2 -n j5_matches.csv


"""
	import pandas as pd
	from operator import itemgetter
	from fuzzywuzzy import fuzz
	from fuzzywuzzy import process
	beall_df = pd.read_csv('Beall_list.txt', header=None, sep='\t', names=['names'])
	j1_df = pd.read_csv('J1.csv', header=None, names=['names'])
	j2_df = pd.read_csv('J2.csv', header=None, names=['names'])
	j3_df = pd.read_csv('J3.csv', header=None, names=['names'])
	j4_df = pd.read_csv('J4.csv', header=None, names=['names'])
	j5_df = pd.read_csv('J5.csv', header=None, names=['names'])

	beall_df_names = beall_df.names.str.replace(' ','').str.lower()
	j1_df_names = j1_df.names.str.replace(' ','').str.lower().tolist()
	j2_df_names = j2_df.names.str.replace(' ','').str.lower().tolist()
	j3_df_names = j3_df.names.str.replace(' ','').str.lower().tolist()
	j4_df_names = j4_df.names.str.replace(' ','').str.lower().tolist()
	j5_df_names = j5_df.names.str.replace(' ','').str.lower().tolist()


	j1_matches = [('xxxxx',0)]
	j2_matches = [('xxxxx',0)]
	j3_matches = [('xxxxx',0)]
	j4_matches = [('xxxxx',0)]
	j5_matches = [('xxxxx',0)]


	for name in beall_df_names:
	match = process.extractOne(name, j1_df_names)
	j1_matches.append(match)

	match = process.extractOne(name, j2_df_names)
	j2_matches.append(match)

	match = process.extractOne(name, j3_df_names)
	j3_matches.append(match)

	match = process.extractOne(name, j4_df_names)
	j4_matches.append(match)

	match = process.extractOne(name, j5_df_names)
	j5_matches.append(match)

	with open('j1_matches.csv', 'w') as fh:
	for item in j1_matches:
	if item:
	fh.write("{},{}\n".format(item[0], item[1]))

	with open('j2_matches.csv', 'w') as fh:
	for item in j2_matches:
	if item:
	fh.write("{},{}\n".format(item[0], item[1]))

	with open('j3_matches.csv', 'w') as fh:
	for item in j3_matches:
	if item:
	fh.write("{},{}\n".format(item[0], item[1]))


	with open('j4_matches.csv', 'w') as fh:
	for item in j4_matches:
	if item:
	fh.write("{},{}\n".format(item[0], item[1]))


	with open('j5_matches.csv', 'w') as fh:
	for item in j5_matches:
	if item:
	fh.write("{},{}\n".format(item[0], item[1]))
	"""
	Run:

	$ sort --field-separator=',' -k2 -n j1_matches.csv
	$ sort --field-separator=',' -k2 -n j2_matches.csv
	$ sort --field-separator=',' -k2 -n j3_matches.csv
	$ sort --field-separator=',' -k2 -n j4_matches.csv
	$ sort --field-separator=',' -k2 -n j5_matches.csv



	"""