aniruddha27/nlp_ie_df.py

## nlp_ie_df.py
# Folder path
folders = glob.glob('./UNGD/UNGDC 1970-2018/Converted sessions/Session*')

# Dataframe
df = pd.DataFrame(columns={'Country','Speech','Session','Year'})

# Read speeches by India
i = 0
for file in folders:

    speech = glob.glob(file+'/IND*.txt')

    with open(speech[0],encoding='utf8') as f:
        # Speech
        df.loc[i,'Speech'] = f.read()
        # Year
        df.loc[i,'Year'] = speech[0].split('_')[-1].split('.')[0]
        # Session
        df.loc[i,'Session'] = speech[0].split('_')[-2]
        # Country
        df.loc[i,'Country'] = speech[0].split('_')[0].split("\\")[-1]
        # Increment counter
        i += 1

df.head()
	# Folder path
	folders = glob.glob('./UNGD/UNGDC 1970-2018/Converted sessions/Session*')

	# Dataframe
	df = pd.DataFrame(columns={'Country','Speech','Session','Year'})

	# Read speeches by India
	i = 0
	for file in folders:

	speech = glob.glob(file+'/IND*.txt')

	with open(speech[0],encoding='utf8') as f:
	# Speech
	df.loc[i,'Speech'] = f.read()
	# Year
	df.loc[i,'Year'] = speech[0].split('_')[-1].split('.')[0]
	# Session
	df.loc[i,'Session'] = speech[0].split('_')[-2]
	# Country
	df.loc[i,'Country'] = speech[0].split('_')[0].split("\\")[-1]
	# Increment counter
	i += 1

	df.head()