Created
November 18, 2020 14:03
-
-
Save Wapiti08/91cfbb1be37abe9d28484ea6b16d96c9 to your computer and use it in GitHub Desktop.
fuzz_match_spans_tokens
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
to verify the average ratio for fuzzy technique token matching | |
''' | |
from fuzzywuzzy import process | |
import textacy | |
from textacy.spacier.doc_extensions import to_tokenized_text, to_tagged_text | |
import spacy | |
from textacy.spacier.utils import merge_spans | |
sentence = "Adversaries may circumvent mechanisms designed to control elevate privileges to gain higher-level permissions. Most modern systems contain native elevation control mechanisms that are intended to limit privileges that a user can perform on a machine. Authorization has to be granted to specific users in order to perform tasks that can be considered of higher risk. An adversary can perform several methods to take advantage of built-in control mechanisms in order to escalate privileges on a system. " | |
technique = "Abuse Elevation Control Mechanism" | |
sentence1 = "An adversary may perform shell escapes or exploit vulnerabilities in an application with the setsuid or setgid bits to get code running in a different user’s context. On Linux or macOS, when the setuid or setgid bits are set for an application, the application will run with the privileges of the owning user or group respectively. . Normally an application is run in the current user’s context, regardless of which user or group owns the application. However, there are instances where programs need to be executed in an elevated context to function properly, but the user running them doesn’t need the elevated privileges. " | |
technique1 = "Setuid and Setgid " | |
spacy_lang = textacy.load_spacy_lang("en_core_web_sm") | |
# ========== Test for sentence ============ | |
docx_textacy = spacy_lang(sentence) | |
tokens = to_tokenized_text(docx_textacy) | |
# merge entities and noun chunks into one token | |
spans = list(docx_textacy.ents) + list(docx_textacy.noun_chunks) | |
spans = spacy.util.filter_spans(spans) | |
merge_spans(spans, docx_textacy) | |
print(process.extractOne(technique,[token for sen_tokens in tokens for token in sen_tokens])) | |
print('=====================') | |
print(process.extractOne(technique, list(spans))) | |
''' | |
Abuse Elevation Control Mechanism: | |
('control', 90) | |
===================== | |
(native elevation control mechanisms, 88) | |
''' | |
# ========== Test for sentence1 =========== | |
docx_textacy1 = spacy_lang(sentence1) | |
tokens1 = to_tokenized_text(docx_textacy1) | |
# merge entities and noun chunks into one token | |
spans1 = list(docx_textacy1.ents) + list(docx_textacy1.noun_chunks) | |
spans1 = spacy.util.filter_spans(spans1) | |
merge_spans(spans1, docx_textacy1) | |
print(process.extractOne(technique1,[token for sen_tokens in tokens1 for token in sen_tokens])) | |
print('=====================') | |
print(process.extractOne(technique1, list(spans1))) | |
''' | |
Setuid and Setgid: | |
('setgid', 90) | |
===================== | |
(the setsuid or setgid bits, 86) | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment