Skip to content

Instantly share code, notes, and snippets.

@andrew-tc
Created August 29, 2017 12:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrew-tc/479d892efd4b3fd217e4e959a2ccc816 to your computer and use it in GitHub Desktop.
Save andrew-tc/479d892efd4b3fd217e4e959a2ccc816 to your computer and use it in GitHub Desktop.
SpoilerOverflow
import re
def sanitize_body(text):
body = text.replace("\\\'", "\'")
body = " ".join(body.split())
body = re.sub('<[^<]+?>', '', body)
body = body.strip()
return body
def extract_content(text):
pattern = r"<blockquote class=\"spoiler\">.+<\/blockquote>"
spoiler = re.findall(pattern, text, flags=re.DOTALL)
for i in range(len(spoiler)):
spoiler[i] = sanitize_body(spoiler[i])
body = re.sub(pattern, "", text, flags=re.DOTALL)
body = sanitize_body(body)
return {"normal": body, "spoiler": spoiler}
def count_word(text):
return len(text.split())
def count_character(text):
characters = 0
words = text.split()
for word in words:
characters += len(word)
return characters
def calculate_percentage(value, total):
return 100.0 * value / total
def calculate_spoiler_percentage(text):
content = extract_content(text)
normal = content["normal"]
normal_chars = count_character(normal)
normal_words = count_word(normal)
spoiler_chars = 0
spoiler_words = 0
for item in content["spoiler"]:
spoiler_chars += count_character(item)
spoiler_words += count_word(item)
spoiler_word_percentage = calculate_percentage(spoiler_words, normal_words + spoiler_words)
spoiler_char_percentage = calculate_percentage(spoiler_chars, normal_chars + spoiler_chars)
return {"word": spoiler_word_percentage, "characters": spoiler_char_percentage}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment