michelkana/yemba_dataset2.py

## yemba_dataset2.py
max_word_len = df.yb.str.len().max()
max_word_len_utf8 = df.yb_utf8.str.len().max()
nb_labels = len(df.word_type.unique())
nb_words = df.shape[0]
print("Number of words: ", nb_words)
print("Number of labels: ", nb_labels)
print("Max word length: {} characters and {} bytes".format(max_word_len, max_word_len_utf8))
	max_word_len = df.yb.str.len().max()
	max_word_len_utf8 = df.yb_utf8.str.len().max()
	nb_labels = len(df.word_type.unique())
	nb_words = df.shape[0]
	print("Number of words: ", nb_words)
	print("Number of labels: ", nb_labels)
	print("Max word length: {} characters and {} bytes".format(max_word_len, max_word_len_utf8))