OsmanMutlu/getentities.py

## getentities.py
import pandas as pd
import re
import codecs
from glob import glob
from pynlpl.formats import folia

files = glob("alladjudicated/http*")

all_df = pd.DataFrame(files, columns=["filename"])

all_df["text"] = ""
all_df["places"] = ""

def prep(row):

    doc = folia.Document(file=row.filename)

    row.text = doc.text()

    places = []

    for i, sentence in enumerate(doc.sentences()):
        for layer in sentence.select(folia.EntitiesLayer):
            for entity in layer.select(folia.Entity):
                if entity.cls == "place":
                    places.append(" ".join([word.text() for word in entity.wrefs() if word.text() != "village" or word.text() != "district"]))

    row.places = places

    return row

all_df = all_df.apply(prep,axis=1)
	import pandas as pd
	import re
	import codecs
	from glob import glob
	from pynlpl.formats import folia

	files = glob("alladjudicated/http*")

	all_df = pd.DataFrame(files, columns=["filename"])

	all_df["text"] = ""
	all_df["places"] = ""

	def prep(row):

	doc = folia.Document(file=row.filename)

	row.text = doc.text()

	places = []

	for i, sentence in enumerate(doc.sentences()):
	for layer in sentence.select(folia.EntitiesLayer):
	for entity in layer.select(folia.Entity):
	if entity.cls == "place":
	places.append(" ".join([word.text() for word in entity.wrefs() if word.text() != "village" or word.text() != "district"]))

	row.places = places

	return row

	all_df = all_df.apply(prep,axis=1)