Skip to content

Instantly share code, notes, and snippets.

@freelancing-solutions
Last active March 24, 2023 19:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save freelancing-solutions/45540df6f26c31c7957a85090d33f189 to your computer and use it in GitHub Desktop.
Save freelancing-solutions/45540df6f26c31c7957a85090d33f189 to your computer and use it in GitHub Desktop.
using regex pattern matching to secure an API
import collections
from CloudFlare.exceptions import CloudFlareAPIError
from starlette.requests import Request
from src.config import config_instance
from CloudFlare import CloudFlare
import ipaddress
import hashlib
import re
from src.make_request import send_request
from src.cache.cache import redis_cache, redis_cached_ttl
EMAIL = config_instance().CLOUDFLARE_SETTINGS.EMAIL
TOKEN = config_instance().CLOUDFLARE_SETTINGS.TOKEN
# Create a middleware function that checks the IP address of incoming requests and only allows requests from the
# Cloudflare IP ranges. Here's an example of how you could do this:
DEFAULT_IPV4 = ['173.245.48.0/20', '103.21.244.0/22', '103.22.200.0/22', '103.31.4.0/22', '141.101.64.0/18',
'108.162.192.0/18', '190.93.240.0/20', '188.114.96.0/20', '197.234.240.0/22',
'198.41.128.0/17',
'162.158.0.0/15', '104.16.0.0/13', '104.24.0.0/14', '172.64.0.0/13', '131.0.72.0/22']
# Patterns for known publicly acceptable routes
route_regexes = {
"home": "^/$",
"all_general_fundamentals": "^/api/v1/fundamental/general$",
"annual_or_quarterly_statements_by_stock_code": "^/api/v1/fundamentals/financial-statements/by-term/(20[1-9][0-9]|203[0-3])-(0[1-9]|1[0-2])-([0-2][0-9]|3[01])\.(20[1-9][0-9]|203[0-3])-(0[1-9]|1[0-2])-([0-2][0-9]|3[01])/[a-zA-Z0-9_-]{1,16}/\b(?:annual|quarterly)\b(?<!/)$",
"company_financial_statements_by_year": "^/api/v1/fundamentals/financial-statements/exchange-year/[a-zA-Z0-9]{16}/\\d{4}$",
"company_fundamental_data_complete": "^/api/v1/fundamental/company/[a-zA-Z0-9]{128}(?<!/)$",
"complete_stock_list": "^/api/v1/stocks$",
"create_exchange": "^/api/v1/exchange$",
"create_stocks_bulk": "^/api/v1/stocks$",
"fetch_exchange_list": "^/api/v1/exchanges$",
"fetch_listed_companies": "^/api/v1/exchange/listed-companies/[a-zA-Z0-9]{128}(?<!/)$",
"fetch_listed_stocks": "^/api/v1/exchange/listed-stocks/[a-zA-Z0-9]{1,16}(?<!/)$",
"fetch_listed_stocks_by_exchange_code": "^/api/v1/stocks/exchange/code/[a-zA-Z0-9]{1,16}(?<!/)$",
"fetch_listed_stocks_by_exchange_id": "^/api/v1/stocks/exchange/id/[a-zA-Z0-9]{16}(?<!/)$",
"fetch_stocks_listed_by_currency": "^/api/v1/stocks/currency/[a-zA-Z0-9]{1,16}(?<!/)$",
"fetch_stocks_listed_in_country": "^/api/v1/stocks/country/[a-zA-Z]{1,4}(?<!/)$",
"get_annual_balance_sheet": "^/api/v1/fundamentals/quarterly-balance-sheet/(20[1-2][0-9]|203[0-3])-(0[1-9]|1[0-2])-\d{2}/[a-zA-Z0-9]{1,16}(?<!/)$",
"get_quarterly_balance_sheet": "^/api/v1/fundamentals/quarterly-balance-sheet/[2-9]\\d{3}-(0[1-9]|1[0-2])-\\d{2}/[a-zA-Z0-9]{1,16}(?<!/)$",
"get_all_technical_indicators_in_an_exchange": "^/api/v1/fundamentals/tech-indicators-by-exchange/exchange-code/[a-zA-Z0-9]{1,16}/(20[1-9][0-9]|203[0-3])(?<!/)$",
"get_bulk_eod_from_exchange_for_date_range": "^/api/v1/eod/(20[1-9][0-9]|203[0-3])-(0[1-9]|1[0-2])-([0-2][0-9]|3[01])\.(20[1-9][0-9]|203[0-3])-(0[1-9]|1[0-2])-([0-2][0-9]|3[01])/(?=[-_\w]{1,16}$)[-_\w]{1,16}$",
"get_companies_analyst_ranks_by_exchange_an_year": "^/api/v1/fundamentals/exchange-analyst-rankings/exchange-code/[a-zA-Z0-9_-]{1,16}/\\d{4}$",
"get_company_financial_statements_api": "^/api/v1/fundamentals/financial-statements/company-statement/[a-zA-Z0-9-_]{1,16}/\\d{4}$",
"get_company_insider_transaction": "^/api/v1/fundamentals/company-insider-transactions/stock-code/[a-zA-Z0-9-_]{1,16}/\\d{4}$",
"get_company_technical_indicators_for_a_year_given_stock_code": "^/api/v1/fundamentals/tech-indicators-by-company/stock-code/[a-zA-Z0-9-_]{1,16}/\\d{4}$",
"get_company_valuation_data_for_a_year": "^/api/v1/fundamentals/company-valuations/stock-code/[a-zA-Z0-9]{1,16}/\\d{4}$",
"get_contract": "^/api/v1/stocks/contract/[a-zA-Z0-9]{16}(?<!/)$",
"get_eod_data_multi_stock": "^/api/v1/eod/\\d{4}-\\d{2}-\\d{2}/[a-zA-Z0-9]{16}(?<!/)$",
"get_eod_data_single_stock": "^/api/v1/eod/\\d{4}-\\d{2}-\\d{2}/[a-zA-Z0-9]{16}\\.[a-zA-Z0-9]{16}(?<!/)$",
"get_eod_from_to_date_for_stock": "^/api/v1/eod/\\d{4}-\\d{2}-\\d{2}\\.\\d{4}-\\d{2}-\\d{2}/[a-zA-Z0-9]{16}(?<!/)$",
"get_exchange_by_code": "^/api/v1/exchange/code/[a-zA-Z0-9]{2,16}(?<!/)$",
"get_exchange_by_id": "^/api/v1/exchange/id/[a-zA-Z0-9]{16}(?<!/)$",
"get_exchange_with_tickers_by_code": "^/api/v1/exchange/exchange-with-tickers/code/[a-zA-Z0-9]{2,16}(?<!/)$",
"get_insider_transactions_from_exchange": "^/api/v1/fundamentals/ex-technical-indicators/exchange-code/[a-zA-Z0-9]{2,16}/\\d{4}(?<!/)$",
"get_news": "^/api/v1/news/article/[a-zA-Z0-9]{1,64}(?<!/)$",
"get_news_articles_bounded": "^/api/v1/news/articles-bounded/(?:(?:[1-9]|[1-8][0-9]|9[0-9])(?<!0))$",
"get_news_articles_by_date": "^/api/v1/news/articles-by-date/\d{4}-\d{2}-\d{2}(?<!/)$",
"get_news_articles_by_publisher": "^/api/v1/news/articles-by-publisher/[a-zA-Z0-9_-]{2,128}(?<!/)$",
"get_news_articles_by_ticker": "^/api/v1/news/articles-by-ticker/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"get_stock_by_code": "^/api/v1/stock/code/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"get_stock_option": "^/api/v1/stocks/options/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"get_update_delete_by_fundamental_id_company_details": "^/api/v1/fundamentals/company-details/id/[a-zA-Z0-9_-]{16}(?<!/)$",
"get_update_delete_by_id_highlights": "^/api/v1/fundamentals/highlights/id/[a-zA-Z0-9_-]{16}(?<!/)$",
"get_update_delete_by_id_postal_address": "^/api/v1/fundamentals/company-address/id/[a-zA-Z0-9_-]{16}(?<!/)$",
"get_update_delete_by_stock_code_company_details": "^/api/v1/fundamentals/company-details/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"get_update_delete_by_stock_highlights": "^/api/v1/fundamentals/highlights/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"get_update_delete_by_stock_postal_address": "^/api/v1/fundamentals/company-address/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"get_valuations_for_companies_listed_in_exchange": "^/api/v1/fundamentals/exchange-valuations/exchange-code/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"income_statement_by_filing_date_and_stock_code": "^/api/v1/fundamentals/financial-statements/filing-date-ticker/\d{2}-\d{2}-\d{4}/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"income_statement_by_statement_id": "^/api/v1/fundamentals/financials/income-statements/[a-zA-Z0-9_-]{16}(?<!/)$",
"income_statements_by_ticker_and_date_range": "^/api/v1/fundamentals/financial-statements/ticker-date-range/\d{2}-\d{2}-\d{4}.\d{2}-\d{2}-\d{4}/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"most_trending_sentiment_by_stock": "^/api/v1/sentiment/trending/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"stock_trend_setters": "^/api/v1/sentiment/trend-setters/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"stock_tweet_sentiments": "^/api/v1/sentiment/tweet/stock/[a-zA-Z0-9_-]{1,16}(?<!/)$",
"open_api": "^/open-api$",
"redoc": "^/redoc$",
"swagger": "^/swagger$",
}
# Define dictionary of malicious patterns
malicious_patterns = {
"buffer_overflow": "^\?unix:A{1000,}", # pattern for buffer overflow attack
"SQL_injection": "'?([\w\s]+)'?\s*OR\s*'?([\w\s]+)'?\s*=\s*'?([\w\s]+)'?", # pattern for SQL injection attack
"XSS": "<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>", # pattern for cross-site scripting (XSS) attack
"path_traversal": "\.\.[\\\/]?|\.[\\\/]{2,}", # pattern for path traversal attack
# "LDAP_injection": "[()\\\/*\x00-\x1f\x80-\xff]", # pattern for LDAP injection attack
"command_injection": ";\s*(?:sh|bash|cmd|powershell)\b", # pattern for command injection attack
"file_inclusion": "(?:file|php|zip|glob|data)\s*:", # pattern for file inclusion attack
"RCE_attack": "^.*\b(?:eval|assert|exec|system|passthru|popen|proc_open)\b.*$",
# pattern for remote code execution attack
"CSRF_attack": "^.*(GET|POST)\s+.*\b(?:referer|origin)\b\s*:\s*(?!https?://(?:localhost|127\.0\.0\.1)).*$",
# # pattern for cross-site request forgery attack
"SSRF_attack": "^.*\b(?:curl|wget|file_get_contents|fsockopen|stream_socket_client|socket_create)\b.*$",
# pattern for server-side request forgery attack
"CSWSH_attack": "^.*\b(?:Sec-WebSocket-Key|Sec-WebSocket-Accept)\b.*$",
"BRUTEFORCE_attack": "^.*\b(?:admin|root|test|user|guest)\b.*$",
"Credential_Stuffing": "(?:\badmin\b|\broot\b|\bpassword\b|\b123456\b|\bqwerty\b|\b123456789\b|\b12345678\b|\b1234567890\b|\b12345\b|\b1234567\b|\b12345678910\b|\b123123\b|\b1234\b|\b111111\b|\babc123\b|\bmonkey\b|\bdragon\b|\bletmein\b|\bsunshine\b|\bprincess\b|\b123456789\b|\bfootball\b|\bcharlie\b|\bshadow\b|\bmichael\b|\bjennifer\b|\bcomputer\b|\bsecurity\b|\btrustno1\b)",
# "Remote_File_Inclusion": "^.*(include|require)(_once)?\s*\(?\s*[\]\s*(https?:)?//",
"Clickjacking": '<iframe\s*src="[^"]+"|<iframe\s*src=\'[^\']+\'|\bX-Frame-Options\b\s*:\s*\b(DENY|SAMEORIGIN)\b',
"XML_External_Entity_Injection": "<!ENTITY[^>]*>",
"Server_Side_Template_Injection": "\{\{\s*(?:config|app|request|session|env|get|post|server|cookie|_|\|\|)\..+?\}\}",
"Business_Logic_Attacks": "^\b(?:price|discount|quantity|shipping|coupon|tax|refund|voucher|payment)\b",
"Javascript_injection": "(?:<\sscript\s>\s*|\blocation\b\s*=|\bwindow\b\s*.\slocation\s=|\bdocument\b\s*.\slocation\s=)",
"HTML_injection": "<\siframe\s|<\simg\s|<\sobject\s|<\sembed\s",
# "HTTP_PARAMETER_POLLUTION": '(?<=^|&)[\w-]+=[^&]*&[\w-]+=',
"DOM_BASED_XSS": "(?:\blocation\b\s*.\s*(?:hash|search|pathname)|document\s*.\s*(?:location|referrer).hash)"
}
class CloudFlareFirewall:
"""
TODO add more functionality to further enhance the security of our gateway
"""
def __init__(self):
self._max_payload_size: int = 64
try:
self.cloud_flare = CloudFlare(email=EMAIL, token=TOKEN)
# self.cloud_flare.api_from_openapi(url="https://www.eod-stock-api.site/open-api")
except CloudFlareAPIError:
pass
self.ip_ranges = []
self.bad_addresses = set()
self.compiled_patterns = [re.compile(_regex) for _regex in route_regexes.values()]
self.compiled_bad_patterns = [re.compile(pattern) for pattern in malicious_patterns.values()]
@staticmethod
async def get_ip_ranges() -> tuple[list[str], list[str]]:
"""
obtains a list of ip addresses from cloudflare edge servers
:return:
"""
_uri = 'https://api.cloudflare.com/client/v4/ips'
_headers = {'Accept': 'application/json', 'X-Auth-Email': EMAIL}
try:
response = await send_request(api_url=_uri, headers=_headers)
ipv4_cidrs = response.get('result', {}).get('ipv4_cidrs', DEFAULT_IPV4)
ipv6_cidrs = response.get('result', {}).get('ipv6_cidrs', [])
return ipv4_cidrs, ipv6_cidrs
except CloudFlareAPIError as e:
return [], []
async def path_matches_known_route(self, path: str):
"""helps to filter out malicious paths based on regex matching"""
# NOTE: that at this stage if this request is not a get then its invalid
return any(pattern.match(path) for pattern in self.compiled_patterns)
async def is_request_malicious(self, headers: dict[str, str], url: Request.url, body: str | bytes):
# Check request for malicious patterns
if 'Content-Length' in headers and int(headers['Content-Length']) > self._max_payload_size:
# Request payload is too large,
self.bad_addresses.add(str(url))
return True
if body:
# Set default regex pattern for string-like request bodies
payload_regex = "^[A-Za-z0-9+/]{1024,}={0,2}$"
if re.match(payload_regex, body):
print(f"Request Matched : {payload_regex}")
self.bad_addresses.add(str(url))
return True
path = str(url.path)
return any((pattern.match(path) for pattern in self.compiled_bad_patterns))
# @redis_cached_ttl(ttl=60 * 30)
async def check_ip_range(self, ip):
"""
This IP Range check only prevents direct server access from an Actual IP Address thereby
bypassing some of the security measures.
checks if an ip address falls within range of those found in cloudflare edge servers
:param ip:
:return:
"""
# TODO Debug this contains problems - some of the cloudflare addresses are being blocked
if ip in self.bad_addresses:
print("found in bad addresses")
return False
for ip_range in self.ip_ranges:
if ipaddress.ip_address(ip) in ipaddress.ip_network(ip_range):
return True
print("did not find a valid ip")
self.bad_addresses.add(ip)
return False
async def save_bad_addresses_to_redis(self) -> int:
"""
take a list of bad addresses and save to redis
:return:
"""
# This will store the list_of_bad_addresses for 3 hours
if self.bad_addresses:
THIRTY_DAYS = 60 * 60 * 24 * 30
await redis_cache.set(key="list_of_bad_addresses", value=list(self.bad_addresses), ttl=THIRTY_DAYS)
return len(self.bad_addresses)
async def restore_bad_addresses_from_redis(self):
bad_addresses = await redis_cache.get(key="list_of_bad_addresses") or []
for bad_address in bad_addresses:
self.bad_addresses.add(bad_address)
@staticmethod
async def confirm_signature(signature, request, secret):
url = request.url
method = request.method.upper()
headers = request.headers
expected_signature = hashlib.sha256(f"{method}{url}{headers}{secret}".encode('UTF-8'))
return signature == expected_signature
@staticmethod
async def sha256(message):
data = message.encode('utf-8')
hash_bytes = hashlib.sha256(data).digest()
hash_hex = hash_bytes.hex()
return hash_hex
@staticmethod
async def create_signature(response, url, secret):
method = response.method.upper()
headers = response.headers
message = f"{method}{url}{headers}{secret}"
signature = hashlib.sha256(message)
return signature
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment