Nithilaa Umasankar Nithilaa

## install_lexrank.py
pip install lexrank

## sumy_summarizer.py
parser = PlaintextParser.from_string(text, Tokenizer("english"))

summarizer_lex = LexRankSummarizer()

summary = summarizer_lex(parser.document, 2)
lex_summary=""

for sentence in summary:
    lex_summary += str(sentence)
    lex_summary += "\n"

## sumy_text.py
text = """America has changed dramatically during recent years.
Not only has the number of graduates in traditional engineering disciplines such as mechanical, civil, electrical, chemical, and aeronautical engineering declined, but in most of the premier American universities engineering curricula now concentrate on and encourage largely the study of engineering science.
As a result, there are declining offerings in engineering subjects dealing with infrastructure, the environment, and related issues, and greater concentration on high technology subjects, largely supporting increasingly complex scientific developments.
While the latter is important, it should not be at the expense of more traditional engineering.
Rapidly developing economies such as China and India, as well as other industrial countries in Europe and Asia, continue to encourage and advance the teaching of engineering.
Both China and India, respectively, graduate six and eight times as many traditional engineers as does the United States.

## import_packages_sumy.py
import nltk
nltk.download('punkt')

import sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

## install_sumy.py
pip install sumy

## sometimes.py
aug2 = naf.Sometimes([
    aug_bert,aug_w2v
],aug_p=0.5, pipeline_p=0.5)

aug2.augment(text, n=10) # Results look better than sequential

## sequential.py
# try Compose & Sometimes
# make offline augmentation + pseudo label from my best ensemble
# re-train

text = "What is your recommended book on Bayesian Statistics?"

aug = naf.Sequential([
    aug_bert,aug_w2v
])

## word2vec.py
# model_type: word2vec, glove or fasttext
aug_w2v = naw.WordEmbsAug(
    model_type='glove', model_path='/content/glove.6B.300d.txt',
    action="substitute")
print("Original:")
print(test_sentence)

## bert.py
TOPK=20 #default=100
ACT = 'insert' #"substitute"

aug_bert = naw.ContextualWordEmbsAug(
    model_path='distilbert-base-uncased',
    #device='cuda',
    action=ACT, top_k=TOPK)
print("Original:")
print(test_sentence)
print("Augmented Text:")

## split.py
aug = naw.SplitAug(name='Split_Aug', aug_min=1, aug_max=10, aug_p=0.3, min_char=4, stopwords=None, tokenizer=None,
                   reverse_tokenizer=None, stopwords_regex=None, verbose=0)

test_sentence_aug = aug.augment(test_sentence)
print(test_sentence)
print(test_sentence_aug)
	parser = PlaintextParser.from_string(text, Tokenizer("english"))

	summarizer_lex = LexRankSummarizer()

	summary = summarizer_lex(parser.document, 2)
	lex_summary=""

	for sentence in summary:
	lex_summary += str(sentence)
	lex_summary += "\n"
	text = """America has changed dramatically during recent years.
	Not only has the number of graduates in traditional engineering disciplines such as mechanical, civil, electrical, chemical, and aeronautical engineering declined, but in most of the premier American universities engineering curricula now concentrate on and encourage largely the study of engineering science.
	As a result, there are declining offerings in engineering subjects dealing with infrastructure, the environment, and related issues, and greater concentration on high technology subjects, largely supporting increasingly complex scientific developments.
	While the latter is important, it should not be at the expense of more traditional engineering.
	Rapidly developing economies such as China and India, as well as other industrial countries in Europe and Asia, continue to encourage and advance the teaching of engineering.
	Both China and India, respectively, graduate six and eight times as many traditional engineers as does the United States.
	import nltk
	nltk.download('punkt')

	import sumy
	from sumy.parsers.plaintext import PlaintextParser
	from sumy.nlp.tokenizers import Tokenizer
	from sumy.summarizers.lex_rank import LexRankSummarizer
	aug2 = naf.Sometimes([
	aug_bert,aug_w2v
	],aug_p=0.5, pipeline_p=0.5)

	aug2.augment(text, n=10) # Results look better than sequential
	# try Compose & Sometimes
	# make offline augmentation + pseudo label from my best ensemble
	# re-train

	text = "What is your recommended book on Bayesian Statistics?"

	aug = naf.Sequential([
	aug_bert,aug_w2v
	])
	# model_type: word2vec, glove or fasttext
	aug_w2v = naw.WordEmbsAug(
	model_type='glove', model_path='/content/glove.6B.300d.txt',
	action="substitute")
	print("Original:")
	print(test_sentence)
	TOPK=20 #default=100
	ACT = 'insert' #"substitute"

	aug_bert = naw.ContextualWordEmbsAug(
	model_path='distilbert-base-uncased',
	#device='cuda',
	action=ACT, top_k=TOPK)
	print("Original:")
	print(test_sentence)
	print("Augmented Text:")
	aug = naw.SplitAug(name='Split_Aug', aug_min=1, aug_max=10, aug_p=0.3, min_char=4, stopwords=None, tokenizer=None,
	reverse_tokenizer=None, stopwords_regex=None, verbose=0)

	test_sentence_aug = aug.augment(test_sentence)
	print(test_sentence)
	print(test_sentence_aug)