Last active
November 14, 2018 07:05
-
-
Save mfmakahiya/fb69ad447f4d520cfe1bb992eddb096f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if __name__ == "__main__": | |
lexicon = Empath() | |
result = lexicon.analyze("the quick brown fox jumps over the lazy dog", normalize=True) | |
df0 = pd.Series(result, name = 'KeyValue') | |
logging.getLogger().setLevel(logging.INFO) | |
col_names = df0.keys() | |
df = pd.DataFrame(columns=col_names) | |
for folder in source_folder_path_list: | |
txt_list = [] | |
for file in os.listdir(folder): | |
if file.endswith(".txt"): | |
txt_list.append(file) | |
for txt_i in txt_list: | |
txt_file_name = txt_i | |
#logging_str = "Coverting " + txt_i | |
#logging.info(logging_str) | |
txt_full_path = os.path.join(folder, txt_file_name) | |
try: | |
txt_file = open(txt_full_path, 'r') | |
lines = txt_file.readlines() | |
lexicon = Empath() | |
result = lexicon.analyze(lines, normalize=True) | |
new_result = pd.Series(result, name = txt_full_path) | |
new_result.index.name = 'Key' | |
new_result.reset_index() | |
df = df.append(new_result) | |
logging.info(txt_i, " succesfully analyzed") | |
except: | |
logging.info(txt_i + " open failed") | |
df = df.dropna() | |
# Clean the data frame | |
df['Details'] = df.index | |
df['Reviewer'] = df['Details'].str.split("/").str[11] | |
df['Text file'] = df['Details'].str.split("/").str[12] | |
df = df.set_index(['Reviewer', 'Text file']) | |
df = df.drop(['Details'], axis = 1) | |
df.to_csv('./data/output/Empath-on-movie-reviews_results.csv', sep=',', encoding='utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment