Skip to content

Instantly share code, notes, and snippets.

@ltphen

ltphen/import Secret

Last active September 10, 2022 02:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ltphen/80a1908700968c726dee9e8070f934f3 to your computer and use it in GitHub Desktop.
Save ltphen/80a1908700968c726dee9e8070f934f3 to your computer and use it in GitHub Desktop.
import os
import csv
path = "/media/ltphen/Ulife/LTPhen von Ulife/tools/dataset/emails"
emailsContent = []
def read_file(path):
f = open(path, "r", encoding = "ISO-8859-1")
return f.read()
def write_in_csv(content):
with open("result.csv", "w+") as out:
csv_file = csv.writer(out)
csv_file.writerow(["content", "spam"])
for item in content:
csv_file.writerow(item)
for i in range(1,7):
for label in ["ham", "spam"]:
pathName = path+"/"+"enron"+str(i)+"/"+label
for _, _, files in os.walk(pathName):
for file in files:
filePath = os.path.join(pathName, file)
content = read_file(filePath)
emailsContent.append((content.replace("Subject:", "").replace("\n", ""), label == "spam"))
write_in_csv(emailsContent)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment