Wapiti08/nlp test

## nlp test
'''
    to verify the average ratio for fuzzy technique token matching

'''

from fuzzywuzzy import process
import textacy
from textacy.spacier.doc_extensions import to_tokenized_text, to_tagged_text
import spacy
from textacy.spacier.utils import merge_spans


sentence = "Adversaries may circumvent mechanisms designed to control elevate privileges to gain higher-level permissions. Most modern systems contain native elevation control mechanisms that are intended to limit privileges that a user can perform on a machine. Authorization has to be granted to specific users in order to perform tasks that can be considered of higher risk. An adversary can perform several methods to take advantage of built-in control mechanisms in order to escalate privileges on a system. "
technique = "Abuse Elevation Control Mechanism"

sentence1 = "An adversary may perform shell escapes or exploit vulnerabilities in an application with the setsuid or setgid bits to get code running in a different user’s context. On Linux or macOS, when the setuid or setgid bits are set for an application, the application will run with the privileges of the owning user or group respectively. . Normally an application is run in the current user’s context, regardless of which user or group owns the application. However, there are instances where programs need to be executed in an elevated context to function properly, but the user running them doesn’t need the elevated privileges. "
technique1 = "Setuid and Setgid "

spacy_lang = textacy.load_spacy_lang("en_core_web_sm")

# ========== Test for sentence ============
docx_textacy = spacy_lang(sentence)
tokens = to_tokenized_text(docx_textacy)
# merge entities and noun chunks into one token
spans = list(docx_textacy.ents) + list(docx_textacy.noun_chunks)
spans = spacy.util.filter_spans(spans)
merge_spans(spans, docx_textacy)

print(process.extractOne(technique,[token for sen_tokens in tokens for token in sen_tokens]))
print('=====================')
print(process.extractOne(technique, list(spans)))

'''
Abuse Elevation Control Mechanism:

('control', 90)
=====================
(native elevation control mechanisms, 88)
'''


# ========== Test for sentence1 ===========
docx_textacy1 = spacy_lang(sentence1)
tokens1 = to_tokenized_text(docx_textacy1)
# merge entities and noun chunks into one token
spans1 = list(docx_textacy1.ents) + list(docx_textacy1.noun_chunks)
spans1 = spacy.util.filter_spans(spans1)
merge_spans(spans1, docx_textacy1)

print(process.extractOne(technique1,[token for sen_tokens in tokens1 for token in sen_tokens]))
print('=====================')
print(process.extractOne(technique1, list(spans1)))

'''
Setuid and Setgid:

('setgid', 90)
=====================
(the setsuid or setgid bits, 86)
'''
	'''
	to verify the average ratio for fuzzy technique token matching

	'''

	from fuzzywuzzy import process
	import textacy
	from textacy.spacier.doc_extensions import to_tokenized_text, to_tagged_text
	import spacy
	from textacy.spacier.utils import merge_spans


	sentence = "Adversaries may circumvent mechanisms designed to control elevate privileges to gain higher-level permissions. Most modern systems contain native elevation control mechanisms that are intended to limit privileges that a user can perform on a machine. Authorization has to be granted to specific users in order to perform tasks that can be considered of higher risk. An adversary can perform several methods to take advantage of built-in control mechanisms in order to escalate privileges on a system. "
	technique = "Abuse Elevation Control Mechanism"

	sentence1 = "An adversary may perform shell escapes or exploit vulnerabilities in an application with the setsuid or setgid bits to get code running in a different user’s context. On Linux or macOS, when the setuid or setgid bits are set for an application, the application will run with the privileges of the owning user or group respectively. . Normally an application is run in the current user’s context, regardless of which user or group owns the application. However, there are instances where programs need to be executed in an elevated context to function properly, but the user running them doesn’t need the elevated privileges. "
	technique1 = "Setuid and Setgid "

	spacy_lang = textacy.load_spacy_lang("en_core_web_sm")

	# ========== Test for sentence ============
	docx_textacy = spacy_lang(sentence)
	tokens = to_tokenized_text(docx_textacy)
	# merge entities and noun chunks into one token
	spans = list(docx_textacy.ents) + list(docx_textacy.noun_chunks)
	spans = spacy.util.filter_spans(spans)
	merge_spans(spans, docx_textacy)

	print(process.extractOne(technique,[token for sen_tokens in tokens for token in sen_tokens]))
	print('=====================')
	print(process.extractOne(technique, list(spans)))

	'''
	Abuse Elevation Control Mechanism:

	('control', 90)
	=====================
	(native elevation control mechanisms, 88)
	'''


	# ========== Test for sentence1 ===========
	docx_textacy1 = spacy_lang(sentence1)
	tokens1 = to_tokenized_text(docx_textacy1)
	# merge entities and noun chunks into one token
	spans1 = list(docx_textacy1.ents) + list(docx_textacy1.noun_chunks)
	spans1 = spacy.util.filter_spans(spans1)
	merge_spans(spans1, docx_textacy1)

	print(process.extractOne(technique1,[token for sen_tokens in tokens1 for token in sen_tokens]))
	print('=====================')
	print(process.extractOne(technique1, list(spans1)))

	'''
	Setuid and Setgid:

	('setgid', 90)
	=====================
	(the setsuid or setgid bits, 86)
	'''