Skip to content

Instantly share code, notes, and snippets.

@hkraemer
Last active February 12, 2018 10:14
Show Gist options
  • Save hkraemer/90d6c8478eef5b28b99471ed02499b5e to your computer and use it in GitHub Desktop.
Save hkraemer/90d6c8478eef5b28b99471ed02499b5e to your computer and use it in GitHub Desktop.
Versuch JP zu verstehen...
### exercise 4 2te teil
language, text = hamlets.iloc[0]
counted_text = count_words_fast(text)
data = pd.DataFrame({
"word": list(counted_text.keys()),
"count": list(counted_text.values())
})
data["length"] = data["word"].apply(len)
data.loc[data["count"] > 10, "frequency"] = "frequent"
data.loc[data["count"] <= 10, "frequency"] = "infrequent"
data.loc[data["count"] == 1, "frequency"] = "unique"
languages = []
for i in range(len(list(counted_text.keys()))):
languages.append(language)
sub_data = pd.DataFrame({
"language": languages,
"mean_word_length": data.groupby(by = "frequency")["length"].mean(),
})
sub_data["frequency"]=list(data["frequency"])
#sub_data.loc[data["frequency"]=="frequent", "mean_word_length"] = mean1
#sub_data.loc[data["frequency"]=="infrequent", "mean_word_length"] = mean2
#sub_data.loc[data["frequency"]=="unique", "mean_word_length"] = mean3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment