-
-
Save SebiAi/34d7eb92a26b8034c936eba8637860c7 to your computer and use it in GitHub Desktop.
Code extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
# Usage: rm -f a.out && chmod +x e.py && ./e.py i.txt > m.cpp && clang++ -Wall -Wextra -pedantic-errors -fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all m.cpp && ./a.out && echo "" | |
# Import regex module | |
import re | |
# Import html module | |
import html | |
# Import sys module | |
import sys | |
# Import os module | |
import os | |
# Define function to extract the string | |
def extract(string): | |
# Insert new lines | |
string = string.replace(r'<pre class=" CodeMirror-line " role="presentation">', '\n') | |
# Use regex to extract the string and join to string | |
regexList = re.findall(r'(?<=>)([^<]+)(?=<)(?!<\/div)', string) | |
string = ''.join(regexList) | |
# Replace all occurrences in string of '\u200b' with '' | |
string = string.replace('\u200b', '') | |
# Unescape | |
return html.unescape(string) | |
# Check the number of arguments | |
if len(sys.argv) != 2: | |
# Print error message | |
print("Usage: " + os.path.basename(__file__) + " <input file>") | |
sys.exit(0) | |
# Get the current working directory | |
inputFile=sys.argv[1] | |
# Check if inputFile starts with a slash in python | |
if inputFile[0] != '/': | |
# Convert to absolute path | |
inputFile = os.path.abspath(inputFile) | |
# Check if second argument is a file | |
if not os.path.isfile(inputFile): | |
sys.exit("Error: Not a file") | |
# Open and read | |
with open(inputFile, 'r') as f: | |
input = f.read() | |
# Extract | |
string = extract(input) | |
# Print the extracted string | |
print(string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment