ABHISHEK SHARMA abhishek-shrm

## keyword-extraction-textrank-10.py
# Creating a dataframe of abstracts and keywords in the validation set
df_val=pd.DataFrame.from_dict([abstracts,keys]).T
df_val.columns=['abstract','keys']
df_val=df_val.reset_index()
print('Shape=>',df_val.shape)
df_val.head()

## keyword-extraction-textrank-9.py
# Function for pre-processing keywords
def preprocess_keys(text):
  # Case Normalization
  text=text.lower()
  # Removing hyphen and whitespaces
  text=re.sub('(\s|-)+',' ',text)
  # Removing any leading and trailing spaces
  text=text.strip()

  return text

## keyword-extraction-textrank-8.py
# Getting list of keys
keys=dict.fromkeys(index,None)

for i in keys_text:
  keys[i]=keys_text[i].split(';')

# Printing sample keywords
dict(list(keys.items())[:4])

## keyword-extraction-textrank-7.py
# Extracting text from all the keys files
keys_text=dict.fromkeys(index,None)

for key_file in keys_list:
  file=open(key_file,encoding='utf-8').read()
  keys_text[int(key_file.split('.')[0].split('/')[-1])]=file

# Printing content of five files
dict(list(keys_text.items())[:5])

## keyword-extraction-textrank-6.py
# Getting list of files containing uncontrolled keys
keys_list=glob.glob('/content/validation/*.uncontr')
keys_list[:5]

## keyword-extraction-textrank-5.py
# Getting abstracts
abstracts=dict.fromkeys(index,None)

for text_file in abs_list:
  file=open(text_file,encoding='utf8').read()
  abstracts[int(text_file.split('.')[0].split('/')[-1])]=file

# Printing five abstracts
dict(list(abstracts.items())[:5])

## keyword-extraction-textrank-4.py
# Getting index value
index=sorted([int(i.split('.')[0].split('/')[-1]) for i in abs_list])
index[:5]

## keyword-extraction-textrank-3.py
len(abs_list)

## keyword-extraction-textrank-2.py
# Getting list of all the files containing abstracts
abs_list=glob.glob('/content/validation/*.abstr')
abs_list[:5]

## keyword-extraction-textrank-1.py
# For working with Regular Expressions
import re
# For getting pathnames
import glob
# For numerical computing
import numpy as np
# For data handling
import pandas as pd
	# Creating a dataframe of abstracts and keywords in the validation set
	df_val=pd.DataFrame.from_dict([abstracts,keys]).T
	df_val.columns=['abstract','keys']
	df_val=df_val.reset_index()
	print('Shape=>',df_val.shape)
	df_val.head()
	# Function for pre-processing keywords
	def preprocess_keys(text):
	# Case Normalization
	text=text.lower()
	# Removing hyphen and whitespaces
	text=re.sub('(\s\|-)+',' ',text)
	# Removing any leading and trailing spaces
	text=text.strip()

	return text
	# Getting list of keys
	keys=dict.fromkeys(index,None)

	for i in keys_text:
	keys[i]=keys_text[i].split(';')

	# Printing sample keywords
	dict(list(keys.items())[:4])
	# Extracting text from all the keys files
	keys_text=dict.fromkeys(index,None)

	for key_file in keys_list:
	file=open(key_file,encoding='utf-8').read()
	keys_text[int(key_file.split('.')[0].split('/')[-1])]=file

	# Printing content of five files
	dict(list(keys_text.items())[:5])
	# Getting list of files containing uncontrolled keys
	keys_list=glob.glob('/content/validation/*.uncontr')
	keys_list[:5]
	# Getting abstracts
	abstracts=dict.fromkeys(index,None)

	for text_file in abs_list:
	file=open(text_file,encoding='utf8').read()
	abstracts[int(text_file.split('.')[0].split('/')[-1])]=file

	# Printing five abstracts
	dict(list(abstracts.items())[:5])
	# Getting index value
	index=sorted([int(i.split('.')[0].split('/')[-1]) for i in abs_list])
	index[:5]
	# Getting list of all the files containing abstracts
	abs_list=glob.glob('/content/validation/*.abstr')
	abs_list[:5]
	# For working with Regular Expressions
	import re
	# For getting pathnames
	import glob
	# For numerical computing
	import numpy as np
	# For data handling
	import pandas as pd