Skip to content

Instantly share code, notes, and snippets.

@isoteemu
Last active January 12, 2024 13:39
Show Gist options
  • Save isoteemu/4930c0cd3b88b448cb9e333473b4c784 to your computer and use it in GitHub Desktop.
Save isoteemu/4930c0cd3b88b448cb9e333473b4c784 to your computer and use it in GitHub Desktop.
Python library for generating data for flooding scammers
"""
libscammer.py - A library for flooding scammers with fake data
This library is used by the scripts in this repository to generate fake data and send it to scammers.
:func:`init()` returns a :class:`Faker` and :class:`requests.Session` instance that can be used to generate fake data and send it to the scammer.
>>> from libscammer import init
>>> faker, session = init(["en_US", "en_GB"])
>>> session.post("https://example.com", data={"name": faker.name()})
To use it a a supervisor, run the script with the path to the script as an argument:
$ python libscammer.py scripts/usps-using-libscammer.py
Supervisor will run the script, and if it fails, it will increase the delay between requests. If the script fails too many times, it will exit.
Data sets used by this library by placing them in the same directory as this file:
- https://github.com/binlist/data/raw/master/ranges.csv for credit card numbers.
- https://gist.github.com/phillipj/4944029 or any other text file for polluting data fields. Should be named "alice.txt"
- https://dev.maxmind.com/geoip/geolite2-free-geolocation-data CSV formated country files for IP addresses.
"""
import ipaddress
from pathlib import Path
import time
from typing import Dict, List, Optional, Set, Tuple
from urllib.parse import urlparse
from faker.providers.credit_card import CardType
import pandas as pd
import requests
from faker import Faker
from faker.config import AVAILABLE_LOCALES, DEFAULT_LOCALE
import random
import builtins
from rich.console import Console
BROKEN_LOCALES = set(["es_AR", "fr_QC"])
USER_AGENTS = [
{
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.1",
"pct": 35.9,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.3",
"pct": 15.86,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.",
"pct": 13.22,
},
{
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.1",
"pct": 12.78,
},
{
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.1",
"pct": 10.79,
},
{
"ua": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.",
"pct": 2.64,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3",
"pct": 2.64,
},
{
"ua": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.3",
"pct": 1.76,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3",
"pct": 0.88,
},
]
console = Console()
# Generate a random user agents
_f = Faker()
USER_AGENTS += [
{"ua": _f.user_agent(), "pct": 10} for _ in range(0, 10)
]
del _f
def libscammer_get_ua() -> str:
"""
This function is used by the scripts to get a random user agent.
It can be overriden by the script to use a specific user agent, for example:
import builtins
builtins.libscammer_get_ua = lambda: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
"""
if getattr(builtins, "libscammer_get_ua", None):
return builtins.libscammer_get_ua()
# Use the percentages to get a random user agent
return random.choices(
[b["ua"] for b in USER_AGENTS], weights=[b["pct"] for b in USER_AGENTS]
)[0]
class ScammerSession(requests.Session):
def __init__(self):
super().__init__()
try:
user_agent = libscammer_get_ua()
except NameError:
user_agent = random.choices(
[b["ua"] for b in USER_AGENTS], weights=[b["pct"] for b in USER_AGENTS]
)[0]
x_forwarded_for = self._x_forwarded_for()
self.headers = {
"User-Agent": user_agent,
"Client-IP": Faker().ipv4(),
}
if len(x_forwarded_for) > 0:
self.headers.update({
"X-Forwarded-For": ','.join(x_forwarded_for),
"Client-IP": random.choice(x_forwarded_for),
})
def _x_forwarded_for(self):
# From leaked source code, some use x-forwarded-for to get the IP
hops_num = random.randint(0, 5)
f = Faker()
hops = [random.choice([f.ipv4(), f.ipv6()]) for _ in range(hops_num)]
return hops
def post(self, url, *args, **kwargs):
console.log(f"POST {url}")
console.log(kwargs.get("data", {}))
if random.random() < 0.12:
kwargs["data"] = self._pollute_data(kwargs.get("data", {}))
kwargs.setdefault("timeout", 20)
# kwargs.setdefault('allow_redirects', False)
r = super().post(url, *args, **kwargs)
# parse return url
origin_domain = urlparse(url).netloc
redirect_domain = urlparse(r.url).netloc
if origin_domain != redirect_domain:
console.log(r.request.headers)
raise Exception(f"Redirected to {r.url} from {url}")
console.log(f" '- {r.url} {r.status_code}")
if not r.ok:
print(r.text)
r.raise_for_status()
return r
def _pollute_data(self, data={}):
if len(data) == 0:
return data
data_file = Path(__file__).parent / "alice.txt"
# Default pollution; the javascript error
pollution = "[object Object]"
if data_file.exists():
with data_file.open("r") as f:
file_content = f.read()
file_length = len(file_content)
random_start = random.randint(0, file_length)
random_amount = random.randint(1, file_length - random_start)
pollution = file_content[random_start : random_start + random_amount]
data_key = random.choice(list(data.keys()))
data[data_key] = pollution
console.log(" '- Polluting data field", repr(data_key))
return data
class ScammerFaker(Faker):
def profile(self, *args, **kwargs):
profile = super().__getattr__("profile")(*args, **kwargs)
profile['password'] = random.choice([
f.password(),
f.text(max_nb_chars=self.random_int(10, 24)).strip(".").lower(),
f.word(),
profile["username"]
+ str(profile["birthdate"].year)[random.choice([0, 2, 4]) :],
])
return profile
def phone_number(self, *args, **kwargs) -> str:
try:
return super().__getattr__("phone_number")(*args, **kwargs)
except AttributeError:
try:
return super().__getattr__("msisdn")(*args, **kwargs)
except ValueError:
return str(otp(10))
def password(self, *args, **kwargs) -> str:
kwargs.setdefault("length", self.random_int(6, 16))
return super().__getattr__("password")(*args, **kwargs)
def credit_card_number(self, *args, **kwargs):
try:
cc_info = self.__getattr__("bank_credit_card")(*args, **kwargs)
return cc_info["number"]
except Exception as e:
console.log("Failed generating credit card number with iin", e)
console.print_exception()
return super().__getattr__("credit_card_number")(*args, **kwargs)
from faker.providers import BaseProvider
from faker.providers.credit_card import Provider as CreditCardProvider
class IINCreditCardProvider(CreditCardProvider):
data_file_ranges = Path(__file__).parent / "ranges.csv"
card_to_scheme = {
CreditCardProvider.credit_card_types['visa']: 'visa',
CreditCardProvider.credit_card_types['mastercard']: 'mastercard',
CreditCardProvider.credit_card_types['amex']: 'amex',
CreditCardProvider.credit_card_types['discover']: 'discover',
CreditCardProvider.credit_card_types['diners']: 'diners',
}
def _df(self):
"""
Lazy load the bank data
"""
data_file = Path(self.data_file_ranges)
if not data_file.exists():
raise Exception(f"Could not find bank data file {data_file}. Download from https://github.com/binlist/data/raw/master/ranges.csv")
if not hasattr(self, "_bank_df"):
import pandas as pd
self._bank_df = pd.read_csv(data_file)
return self._bank_df
def bank_credit_card(self, card_type: Optional[CardType] = None):
""" Generate a random credit card number.
Uses dataset to generate a random credit card number that will pass the iin check.
"""
country_code = self.generator.current_country_code().upper()
df = self._df()
if card_type is not None:
card = self._credit_card_type(card_type)
card_scheme = self.card_to_scheme[card]
df = df[(df["scheme"] == card_scheme) & (df["country"] == country_code)]
else:
df = df[(df["country"] == country_code)]
df = df.fillna("")
# select random row
# TODO: Make deterministic
bank = df.sample(n=1).iloc[0].to_dict()
prefix = random.randint(bank["iin_start"], bank["iin_end"] or bank["iin_start"])
if not card_type:
# Select card type from card_to_scheme where value matches bank['scheme']
card_scheme = next((k for k, v in self.card_to_scheme.items() if v == bank['scheme']), None)
card = self._credit_card_type(card_scheme)
bank['number'] = self._generate_number(str(prefix), card.length)
return bank
def random_credit_card(self):
self.random_choices(elements=self.card_to_scheme.keys())
class GeoIP(BaseProvider):
geoip_file_locations = Path(__file__).parent / "GeoLite2-Country-Locations-en.csv"
"GeoLite2-Country-Locations-en.csv file"
geoip_file_blocks_ipv4 = Path(__file__).parent / "GeoLite2-Country-Blocks-IPv4.csv"
"GeoLite2-Country-Blocks-IPv4.csv file"
IPV4_RANGES: Dict[str, Tuple[int, int]] = {
'AD': (783081472, 783089663),
'AE': (1590165504, 1590689791),
'AF': (2503344128, 2503376895),
'AG': (637681664, 637689855),
'AI': (1280094208, 1280096255),
'AL': (1332346880, 1332412415),
'AM': (776339456, 776470527),
'AO': (1772617728, 1773142015),
'AQ': (1738046976, 1738047487),
'AR': (3128655872, 3129999359),
'AS': (1138372608, 1138376703),
'AT': (1299447808, 1299709951),
'AU': (24641536, 27262975),
'AW': (3132948480, 3132964863),
'AX': (1388814336, 1388822527),
'AZ': (96403456, 96468991),
'BA': (531660800, 531693567),
'BB': (1211324928, 1211333119),
'BD': (1921122304, 1921187839),
'BE': (1837503190, 1838153727),
'BF': (1722941440, 1723138047),
'BG': (1596588032, 1596719103),
'BH': (263716864, 263847935),
'BI': (2591408128, 2591424511),
'BJ': (2315190272, 2315255807),
'BL': (394782720, 394782975),
'BM': (3289137152, 3289153535),
'BN': (2661351424, 2661416959),
'BO': (3006922752, 3007053823),
'BQ': (2701152256, 2701156351),
'BR': (3010291712, 3015562239),
'BS': (406011904, 406028287),
'BT': (1996644352, 1996652543),
'BV': (3116465152, 3116465407),
'BW': (2829516800, 2829582335),
'BY': (2994208768, 2994733055),
'BZ': (3200581632, 3200614399),
'CA': (1674575872, 1677721599),
'CC': (234969856, 234970111),
'CD': (703791104, 703856639),
'CF': (3290482688, 3290484735),
'CG': (2691760128, 2691825663),
'CH': (959512576, 960561151),
'CI': (1720188928, 1720451071),
'CK': (3393265664, 3393273855),
'CL': (3211788288, 3212312575),
'CM': (1727266816, 1727528959),
'CN': (610271232, 618659839),
'CO': (3045064704, 3047161855),
'CR': (3384737792, 3385851903),
'CU': (2563637248, 2563768319),
'CV': (2774163456, 2774171647),
'CW': (3193438208, 3193503743),
'CX': (234969472, 234969855),
'CY': (530120704, 530186239),
'CZ': (1521491968, 1522008063),
'DE': (897238056, 905969663),
'DJ': (700309504, 700317695),
'DK': (1462763520, 1463304191),
'DM': (3470131200, 3470134271),
'DO': (2561015808, 2561146879),
'DZ': (694157312, 695205887),
'EC': (3125018624, 3125280767),
'EE': (1522401280, 1522532351),
'EG': (2627731456, 2631033155),
'EH': (2593522688, 2593522943),
'ER': (3301466112, 3301470207),
'ES': (1476395008, 1478492159),
'ET': (3300767381, 3300917247),
'FI': (2196111360, 2196439039),
'FJ': (2423783424, 2423848959),
'FK': (1347014656, 1347017471),
'FM': (960937984, 960942079),
'FO': (1481973760, 1481981951),
'FR': (1455423488, 1459617791),
'GA': (698220544, 698351615),
'GB': (426470231, 436207615),
'GD': (1249531904, 1249533951),
'GE': (529661952, 529727487),
'GF': (1383825920, 1383858175),
'GG': (1481736192, 1481744383),
'GH': (2594177024, 2595225599),
'GI': (1433632768, 1433640959),
'GL': (1481834496, 1481842687),
'GM': (2696282112, 2696413183),
'GN': (3314925568, 3314941951),
'GP': (1568243712, 1568276479),
'GQ': (1777065984, 1777070079),
'GR': (1581309081, 1581776895),
'GS': (2587963392, 2587963647),
'GT': (3197392384, 3197470207),
'GU': (2826633216, 2826698751),
'GW': (2588490752, 2588491775),
'GY': (3192913920, 3192946687),
'HK': (1912340480, 1912602623),
'HM': (2470577152, 2470577919),
'HN': (3050438656, 3050504191),
'HR': (1569193984, 1569718271),
'HT': (2489745408, 2489778175),
'HU': (1501822976, 1502085119),
'ID': (666894336, 671088639),
'IE': (1461714944, 1462763519),
'IL': (2218899456, 2219769855),
'IM': (1406795776, 1406803967),
'IN': (1977400193, 1979711487),
'IO': (3391909888, 3391910911),
'IQ': (636223488, 636485631),
'IR': (45088768, 46137343),
'IS': (2194669568, 2194735103),
'IT': (1325400064, 1329594367),
'JE': (2784624640, 2784690175),
'JM': (1209729024, 1209786367),
'JO': (2954657792, 2954756095),
'JP': (2231369728, 2248146943),
'KE': (1764753408, 1765801983),
'KG': (2662694912, 2662727679),
'KH': (606437376, 606470143),
'KI': (1744437248, 1744438271),
'KM': (1725921280, 1725922303),
'KN': (3431579648, 3431587839),
'KP': (2939006976, 2939007999),
'KR': (3551002624, 3556769791),
'KW': (623191334, 623378431),
'KY': (1256079360, 1256087551),
'KZ': (38273024, 38797311),
'LA': (1934901248, 1934917631),
'LB': (2995191808, 2995257343),
'LC': (1162871296, 1162879999),
'LI': (1383399424, 1383407615),
'LK': (1887830016, 1887961087),
'LR': (693501952, 693510143),
'LS': (2179465216, 2179497983),
'LT': (1312369840, 1312817151),
'LU': (2661679104, 2661875711),
'LV': (778895360, 778960895),
'LY': (704380928, 704643071),
'MA': (1769996288, 1772093439),
'MC': (1490108416, 1490124799),
'MD': (1551007744, 1551106047),
'ME': (533512192, 533528575),
'MF': (1248913408, 1248915455),
'MG': (1712324608, 1712717823),
'MH': (1969707008, 1969709055),
'MK': (1293680640, 1293811711),
'ML': (2591457280, 2591473663),
'MM': (628031488, 628064255),
'MN': (2090745856, 2090762239),
'MO': (1022754816, 1022820351),
'MP': (1701044224, 1701052415),
'MQ': (1552968704, 1552999935),
'MR': (700203008, 700219391),
'MS': (2734472192, 2734473215),
'MT': (772472832, 772538367),
'MU': (1718616064, 1719140351),
'MV': (460488704, 460505087),
'MW': (1715863552, 1715994623),
'MX': (3179282432, 3184116735),
'MY': (2944925696, 2945581055),
'MZ': (3319398400, 3319529471),
'NC': (1701117952, 1701134335),
'NE': (696918016, 696926207),
'NF': (3406950400, 3406951423),
'NG': (1768947712, 1769996287),
'NI': (3125542912, 3125673983),
'NL': (2441196709, 2446983167),
'NO': (2338936320, 2339962879),
'NP': (620494848, 620625919),
'NR': (3412254720, 3412262911),
'NU': (832319488, 832319999),
'NZ': (2050490368, 2051014655),
'OM': (86245376, 86376447),
'PA': (3125280768, 3125542911),
'PE': (3202875392, 3203399679),
'PF': (2066890752, 2066907135),
'PG': (2096152576, 2096160767),
'PH': (1892155392, 1892941823),
'PK': (656408576, 658505727),
'PL': (1392508928, 1393623705),
'PM': (1176764416, 1176768511),
'PN': (1102388736, 1102388799),
'PR': (684130304, 684195839),
'PS': (3164667904, 3164733439),
'PT': (1441792000, 1442316287),
'PW': (960950272, 960954367),
'PY': (3044540416, 3045064703),
'QA': (78249984, 78381055),
'RE': (1713569792, 1713635327),
'RO': (1450704896, 1451220991),
'RS': (3000762368, 3001024511),
'RU': (1595408384, 1595998207),
'RW': (700055552, 700121087),
'SA': (3157262336, 3157786623),
'SB': (3389104128, 3389112319),
'SC': (2598256640, 2598305791),
'SD': (1719140352, 1719664639),
'SE': (1373634560, 1374683135),
'SG': (721420288, 727187455),
'SH': (3255173120, 3255173631),
'SI': (3166961664, 3167223807),
'SJ': (758880000, 758880255),
'SK': (1315045376, 1315176447),
'SL': (1720647680, 1720680447),
'SM': (1505370112, 1505378303),
'SN': (693239808, 693370879),
'SO': (2591277056, 2591293439),
'SR': (3132325888, 3132358655),
'SS': (1777061888, 1777063935),
'ST': (961839104, 961843199),
'SV': (3193307136, 3193438207),
'SX': (3194355712, 3194363903),
'SY': (83886080, 83951615),
'SZ': (693428224, 693436415),
'TC': (1107243008, 1107247103),
'TD': (2588180480, 2588188671),
'TF': (2591589816, 2591589831),
'TG': (3299344384, 3299606527),
'TH': (2875195392, 2875719679),
'TJ': (1426685952, 1426702335),
'TK': (459282432, 459284479),
'TL': (3032326144, 3032330239),
'TM': (1599430656, 1599438847),
'TN': (3305111552, 3306393528),
'TO': (960966656, 960970751),
'TR': (1432346624, 1433353473),
'TT': (3123445760, 3123576831),
'TV': (960970752, 960974847),
'TW': (2019557376, 2021654527),
'TZ': (2627469312, 2627731455),
'UA': (1564999680, 1565368447),
'UG': (1716518912, 1717043199),
'UM': (1074392944, 1074392951),
'US': (469762048, 520093695),
'UY': (2805465088, 2805989375),
'UZ': (1412841472, 1412857855),
'VA': (961533952, 961540095),
'VC': (3485462528, 3485464575),
'VE': (3200647168, 3201302527),
'VG': (1112653824, 1112657919),
'VI': (2464284672, 2464350207),
'VN': (249561088, 251658239),
'VU': (960974848, 960978943),
'WF': (1964253184, 1964255231),
'WS': (960958464, 960962559),
'XK': (778266929, 778305535),
'YE': (2250440704, 2250506239),
'YT': (1552847360, 1552864255),
'ZA': (3309305856, 3311589983),
'ZM': (1720713216, 1721237503),
'ZW': (699351040, 699368447)
}
"Some large IP ranges for each contry. This is used if the GeoIP database is not available."
def _df(self):
"""
Load the GeoIP data files.
Download from https://dev.maxmind.com/geoip/geoip2/geolite2/ and place :attr:`data_file_locations` and :attr:`data_file_blocks_ipv4`.
"""
if not hasattr(self, "_geoip_df"):
missing_files = []
if not Path(self.geoip_file_locations).exists():
missing_files.append(self.geoip_file_locations)
if not Path(self.geoip_file_blocks_ipv4).exists():
missing_files.append(self.geoip_file_blocks_ipv4)
if len(missing_files) > 0:
raise FileNotFoundError(f"Could not find GeoIP data files {missing_files}. Download from https://dev.maxmind.com/geoip/geoip2/geolite2/")
locations_df = pd.read_csv(self.geoip_file_locations)
ipv4_df = pd.read_csv(self.geoip_file_blocks_ipv4)
self._geoip_df = locations_df.merge(ipv4_df, on="geoname_id")
return self._geoip_df
def country_ipv4(self, country_code: Optional[str] = None) -> str:
"""
Generate a random IP address for a country.
"""
if country_code is None:
country_code = self.generator.current_country_code()
# Convert country code to locale
locale_and_country = country_code.upper().replace("-", "_").split("_")
if len(locale_and_country) == 2:
country_code = locale_and_country[1]
try:
df = self._df()
df = df[df["country_iso_code"] == country_code.upper()]
df = df[df["network"].notnull()]
except FileNotFoundError as e:
# Fallback to random IP
console.log("Could not find GeoIP data files.", e, "Using fallback IP ranges")
ip = self.random_int(*self.IPV4_RANGES[country_code.upper()])
return str(ipaddress.IPv4Address(ip) + 1)
# select random row
try:
network = df.sample(n=1)['network'].values[0]
# generate random IP address in the network
# Convert network to decimals
network_obj = ipaddress.IPv4Network(network)
ip = ipaddress.IPv4Address(
random.randint(int(network_obj.network_address), int(network_obj.broadcast_address))
)
except Exception as e:
console.log(f"Could not find IP for country {country_code}", e)
ip = ipaddress.IPv4Address(random.randint(0, 2**32))
return str(ip + 1)
def otp(length=-1):
if length < 1:
length = random.randint(4, 8)
return "".join([str(random.randint(0, 9)) for _ in range(length)])
def init(locales: Set = set(AVAILABLE_LOCALES) - BROKEN_LOCALES) -> Tuple[ScammerFaker, ScammerSession]:
"""
:param locales: A list of locales to use for the request session and faker instance. If a locale is not available, a similar locales will be attempted.
"""
if isinstance(locales, str):
locales = [locales]
# Locale code is used for the session, and faker_locale is used for the faker instance
locale_code = faker_locale = random.choice(tuple(locales))
if faker_locale not in AVAILABLE_LOCALES:
# Select a random locale that is similar to the requested locale
available_locales = set(AVAILABLE_LOCALES) - set(BROKEN_LOCALES)
locale_options = [l for l in available_locales if faker_locale in l]
faker_locale = random.choice(locale_options) if len(locale_options) > 0 else DEFAULT_LOCALE
f = ScammerFaker(faker_locale)
s = ScammerSession()
f.add_provider(IINCreditCardProvider)
f.add_provider(GeoIP)
console.log(f"Using {locale_code!r} for country and {faker_locale!r} for faker locale")
client_ip = f.country_ipv4(locale_code)
s.headers["Client-IP"] = client_ip
if s.headers.get("X-Forwarded-For"):
# Inject client ip in random position
hops = s.headers["X-Forwarded-For"].split(",")
hops.insert(random.randint(0, len(hops)), client_ip)
s.headers["X-Forwarded-For"] = ",".join(hops)
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language
_cc = faker_locale.replace("_", "-")
s.headers["Accept-Language"] = f"{_cc};q=0.9, en;q=0.5"
return f, s
def introduce_qwerty_typo(s: str, accuracy=0.95) -> str:
"""
Simulate typos made with a QWERTY keyboard layout by swapping adjacent characters.
"""
# QWERTY keyboard layout
qwerty = [
'1234567890+',
'qwertyuiopå',
'asdfghjklöä',
'zxcvbnm,.-'
]
# Create a dictionary mapping each character to its adjacent characters
adjacent = {}
for row in qwerty:
for i, c_org in enumerate(row):
adj = ''
if i > 0:
adj += row[i-1]
if i < len(row) - 1:
adj += row[i+1]
adjacent[c_org] = adj
r = ""
i = 0
s_len = len(s)
while i < s_len:
c_org = s[i]
c_lower = c_org.lower()
if c_lower in adjacent and random.random() > accuracy:
# Randomly swap left or right
l_or_r = random.randint(0, 1)
typo_c = min(len(adjacent[c_lower]) - 1, l_or_r)
typo = adjacent[c_lower][typo_c]
if c_org.isupper():
typo = typo.upper()
r += typo
else:
r += c_org
i += 1
return r
locale = random.choice(AVAILABLE_LOCALES)
f = ScammerFaker(locale)
profile = f.profile()
def passwd(profile=profile):
"""
Deprecated
"""
return random.choice(
[
f.password(),
f.text(max_nb_chars=random.randint(10, 24)).strip(".").lower(),
f.word(),
profile["username"]
+ str(profile["birthdate"].year)[random.choice([0, 2, 4]) :],
]
)
import click
@click.command()
@click.argument("script", type=click.Path(exists=True))
@click.option("--mullvad/--no-mullvad", default=True, help="Enable Mullvad check")
@click.option("--slow", default=False, help="Enable slow mode")
def cli(script, mullvad=True, slow=True):
"""
Run a script and supervise it.
If the script fails, the delay between requests will increase, and if it fails too many times, it will exit.
"""
# Status of the last 30 requests
request_window: List[bool] = []
# Delay between requests. Increases if there are failures, and resets on success
delay: float = 0
# User agent to use for the next script run
browser: str
def _run_script(script):
with open(script, "r") as _fd:
g = globals()
# Used to get the user agent defined in while -loop
# Not sure IF builtins is needed, but it seems to be so that the script can access the function
g['__builtins__'] = builtins
g['__builtins__'].libscammer_get_ua = lambda: browser
code = _fd.read()
# Add Mullvad check to the script
if mullvad:
mullvad_code = r"""
import requests
r = requests.get("https://am.i.mullvad.net/json")
if not r.json()['mullvad_exit_ip']:
raise Exception("Mullvad is not connected")
"""
code = mullvad_code + code
random.seed()
return exec(code, g)
while True:
# Select a random user agent
browser_idx = random.choices(
range(0, len(USER_AGENTS)), weights=[b["pct"] for b in USER_AGENTS]
)[0]
browser = USER_AGENTS[browser_idx]["ua"]
try:
_run_script(script)
request_window.append(True)
delay = 0
if slow:
sleep_time = random.randint(5, 120)
tpl = "[bold green]Sleeping[/bold green] for [bold]{sleep_time}[/bold] seconds"
with console.status(tpl.format(sleep_time=sleep_time)) as _status:
for i in range(sleep_time):
_status.update(tpl.format(sleep_time=sleep_time-i))
time.sleep(1)
except KeyboardInterrupt as e:
raise e
except Exception as e:
console.log(f"[red]Error[/red]: {e}")
console.print_exception()
request_window.append(False)
delay = min(delay + 0.5, 2.5)
# Lower the weight of the browser that failed
USER_AGENTS[browser_idx]["pct"] *= 0.8
console.log(f"Lowering weight of {browser} to {USER_AGENTS[browser_idx]['pct']}")
finally:
successes = sum(request_window)
console.log(
f"[green]{successes}[/green] successes in the last [green]{len(request_window)}[/green] requests"
)
failures = len(request_window) - successes
sleep_time = delay * failures
with console.status(
f"[bold green]Sleeping[/bold green] for [bold]{sleep_time}[/bold] seconds"
) as _status:
time.sleep(sleep_time)
if len(request_window) > 30:
request_window.pop(0)
if successes == 0:
console.log("[red]Too many failures, exiting.[/red]")
break
if __name__ == "__main__":
cli()
"""
libscammer.py - A library for flooding scammers with fake data
This library is used by the scripts in this repository to generate fake data and send it to scammers.
:func:`init()` returns a :class:`Faker` and :class:`requests.Session` instance that can be used to generate fake data and send it to the scammer.
>>> from libscammer import init
>>> faker, session = init(["en_US", "en_GB"])
>>> session.post("https://example.com", data={"name": faker.name()})
To use it a a supervisor, run the script with the path to the script as an argument:
$ python libscammer.py scripts/usps-using-libscammer.py
Supervisor will run the script, and if it fails, it will increase the delay between requests. If the script fails too many times, it will exit.
Data sets used by this library by placing them in the same directory as this file:
- https://github.com/binlist/data/raw/master/ranges.csv for credit card numbers.
- https://gist.github.com/phillipj/4944029 or any other text file for polluting data fields. Should be named "alice.txt"
- https://dev.maxmind.com/geoip/geolite2-free-geolocation-data CSV formated country files for IP addresses.
"""
import ipaddress
from pathlib import Path
import time
from typing import Dict, List, Optional, Set, Tuple
from urllib.parse import urlparse
from faker.providers.credit_card import CardType
import pandas as pd
import requests
from faker import Faker
from faker.config import AVAILABLE_LOCALES, DEFAULT_LOCALE
import random
import builtins
from rich.console import Console
BROKEN_LOCALES = set(["es_AR", "fr_QC"])
USER_AGENTS = [
{
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.1",
"pct": 35.9,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.3",
"pct": 15.86,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.",
"pct": 13.22,
},
{
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.1",
"pct": 12.78,
},
{
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.1",
"pct": 10.79,
},
{
"ua": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.",
"pct": 2.64,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3",
"pct": 2.64,
},
{
"ua": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.3",
"pct": 1.76,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3",
"pct": 0.88,
},
{
"ua": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3",
"pct": 0.88,
},
]
console = Console()
# Generate a random user agents
_f = Faker()
USER_AGENTS += [
{"ua": _f.user_agent(), "pct": 10} for _ in range(0, 10)
]
del _f
def libscammer_get_ua() -> str:
"""
This function is used by the scripts to get a random user agent.
It can be overriden by the script to use a specific user agent, for example:
import builtins
builtins.libscammer_get_ua = lambda: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
"""
if getattr(builtins, "libscammer_get_ua", None):
return builtins.libscammer_get_ua()
# Use the percentages to get a random user agent
return random.choices(
[b["ua"] for b in USER_AGENTS], weights=[b["pct"] for b in USER_AGENTS]
)[0]
class ScammerSession(requests.Session):
def __init__(self):
super().__init__()
try:
user_agent = libscammer_get_ua()
except NameError:
user_agent = random.choices(
[b["ua"] for b in USER_AGENTS], weights=[b["pct"] for b in USER_AGENTS]
)[0]
x_forwarded_for = self._x_forwarded_for()
self.headers = {
"User-Agent": user_agent,
"Client-IP": Faker().ipv4(),
}
if len(x_forwarded_for) > 0:
self.headers.update({
"X-Forwarded-For": ','.join(x_forwarded_for),
"Client-IP": random.choice(x_forwarded_for),
})
def _x_forwarded_for(self):
# From leaked source code, some use x-forwarded-for to get the IP
hops_num = random.randint(0, 5)
f = Faker()
hops = [random.choice([f.ipv4(), f.ipv6()]) for _ in range(hops_num)]
return hops
def post(self, url, *args, **kwargs):
console.log(f"POST {url}")
console.log(kwargs.get("data", {}))
if random.random() < 0.12:
kwargs["data"] = self._pollute_data(kwargs.get("data", {}))
kwargs.setdefault("timeout", 20)
# kwargs.setdefault('allow_redirects', False)
r = super().post(url, *args, **kwargs)
# parse return url
origin_domain = urlparse(url).netloc
redirect_domain = urlparse(r.url).netloc
if origin_domain != redirect_domain:
console.log(r.request.headers)
raise Exception(f"Redirected to {r.url} from {url}")
console.log(f" '- {r.url} {r.status_code}")
if not r.ok:
print(r.text)
r.raise_for_status()
return r
def _pollute_data(self, data={}):
if len(data) == 0:
return data
data_file = Path(__file__).parent / "alice.txt"
# Default pollution; the javascript error
pollution = "[object Object]"
if data_file.exists():
with data_file.open("r") as f:
file_content = f.read()
file_length = len(file_content)
random_start = random.randint(0, file_length)
random_amount = random.randint(1, file_length - random_start)
pollution = file_content[random_start : random_start + random_amount]
data_key = random.choice(list(data.keys()))
data[data_key] = pollution
console.log(" '- Polluting data field", repr(data_key))
return data
class ScammerFaker(Faker):
def profile(self, *args, **kwargs):
profile = super().__getattr__("profile")(*args, **kwargs)
profile['password'] = random.choice([
f.password(),
f.text(max_nb_chars=self.random_int(10, 24)).strip(".").lower(),
f.word(),
profile["username"]
+ str(profile["birthdate"].year)[random.choice([0, 2, 4]) :],
])
return profile
def phone_number(self, *args, **kwargs) -> str:
try:
return super().__getattr__("phone_number")(*args, **kwargs)
except AttributeError:
try:
return super().__getattr__("msisdn")(*args, **kwargs)
except ValueError:
return str(otp(10))
def password(self, *args, **kwargs) -> str:
kwargs.setdefault("length", self.random_int(6, 16))
return super().__getattr__("password")(*args, **kwargs)
def credit_card_number(self, *args, **kwargs):
try:
cc_info = self.__getattr__("bank_credit_card")(*args, **kwargs)
return cc_info["number"]
except Exception as e:
console.log("Failed generating credit card number with iin", e)
console.print_exception()
return super().__getattr__("credit_card_number")(*args, **kwargs)
from faker.providers import BaseProvider
from faker.providers.credit_card import Provider as CreditCardProvider
class IINCreditCardProvider(CreditCardProvider):
data_file_ranges = Path(__file__).parent / "ranges.csv"
card_to_scheme = {
CreditCardProvider.credit_card_types['visa']: 'visa',
CreditCardProvider.credit_card_types['mastercard']: 'mastercard',
CreditCardProvider.credit_card_types['amex']: 'amex',
CreditCardProvider.credit_card_types['discover']: 'discover',
CreditCardProvider.credit_card_types['diners']: 'diners',
}
def _df(self):
"""
Lazy load the bank data
"""
data_file = Path(self.data_file_ranges)
if not data_file.exists():
raise Exception(f"Could not find bank data file {data_file}. Download from https://github.com/binlist/data/raw/master/ranges.csv")
if not hasattr(self, "_bank_df"):
import pandas as pd
self._bank_df = pd.read_csv(data_file)
return self._bank_df
def bank_credit_card(self, card_type: Optional[CardType] = None):
""" Generate a random credit card number.
Uses dataset to generate a random credit card number that will pass the iin check.
"""
country_code = self.generator.current_country_code().upper()
df = self._df()
if card_type is not None:
card = self._credit_card_type(card_type)
card_scheme = self.card_to_scheme[card]
df = df[(df["scheme"] == card_scheme) & (df["country"] == country_code)]
else:
df = df[(df["country"] == country_code)]
df = df.fillna("")
# select random row
# TODO: Make deterministic
bank = df.sample(n=1).iloc[0].to_dict()
prefix = random.randint(bank["iin_start"], bank["iin_end"] or bank["iin_start"])
if not card_type:
# Select card type from card_to_scheme where value matches bank['scheme']
card_scheme = next((k for k, v in self.card_to_scheme.items() if v == bank['scheme']), None)
card = self._credit_card_type(card_scheme)
bank['number'] = self._generate_number(str(prefix), card.length)
return bank
def random_credit_card(self):
self.random_choices(elements=self.card_to_scheme.keys())
class GeoIP(BaseProvider):
geoip_file_locations = Path(__file__).parent / "GeoLite2-Country-Locations-en.csv"
"GeoLite2-Country-Locations-en.csv file"
geoip_file_blocks_ipv4 = Path(__file__).parent / "GeoLite2-Country-Blocks-IPv4.csv"
"GeoLite2-Country-Blocks-IPv4.csv file"
IPV4_RANGES: Dict[str, Tuple[int, int]] = {
'AD': (783081472, 783089663),
'AE': (1590165504, 1590689791),
'AF': (2503344128, 2503376895),
'AG': (637681664, 637689855),
'AI': (1280094208, 1280096255),
'AL': (1332346880, 1332412415),
'AM': (776339456, 776470527),
'AO': (1772617728, 1773142015),
'AQ': (1738046976, 1738047487),
'AR': (3128655872, 3129999359),
'AS': (1138372608, 1138376703),
'AT': (1299447808, 1299709951),
'AU': (24641536, 27262975),
'AW': (3132948480, 3132964863),
'AX': (1388814336, 1388822527),
'AZ': (96403456, 96468991),
'BA': (531660800, 531693567),
'BB': (1211324928, 1211333119),
'BD': (1921122304, 1921187839),
'BE': (1837503190, 1838153727),
'BF': (1722941440, 1723138047),
'BG': (1596588032, 1596719103),
'BH': (263716864, 263847935),
'BI': (2591408128, 2591424511),
'BJ': (2315190272, 2315255807),
'BL': (394782720, 394782975),
'BM': (3289137152, 3289153535),
'BN': (2661351424, 2661416959),
'BO': (3006922752, 3007053823),
'BQ': (2701152256, 2701156351),
'BR': (3010291712, 3015562239),
'BS': (406011904, 406028287),
'BT': (1996644352, 1996652543),
'BV': (3116465152, 3116465407),
'BW': (2829516800, 2829582335),
'BY': (2994208768, 2994733055),
'BZ': (3200581632, 3200614399),
'CA': (1674575872, 1677721599),
'CC': (234969856, 234970111),
'CD': (703791104, 703856639),
'CF': (3290482688, 3290484735),
'CG': (2691760128, 2691825663),
'CH': (959512576, 960561151),
'CI': (1720188928, 1720451071),
'CK': (3393265664, 3393273855),
'CL': (3211788288, 3212312575),
'CM': (1727266816, 1727528959),
'CN': (610271232, 618659839),
'CO': (3045064704, 3047161855),
'CR': (3384737792, 3385851903),
'CU': (2563637248, 2563768319),
'CV': (2774163456, 2774171647),
'CW': (3193438208, 3193503743),
'CX': (234969472, 234969855),
'CY': (530120704, 530186239),
'CZ': (1521491968, 1522008063),
'DE': (897238056, 905969663),
'DJ': (700309504, 700317695),
'DK': (1462763520, 1463304191),
'DM': (3470131200, 3470134271),
'DO': (2561015808, 2561146879),
'DZ': (694157312, 695205887),
'EC': (3125018624, 3125280767),
'EE': (1522401280, 1522532351),
'EG': (2627731456, 2631033155),
'EH': (2593522688, 2593522943),
'ER': (3301466112, 3301470207),
'ES': (1476395008, 1478492159),
'ET': (3300767381, 3300917247),
'FI': (2196111360, 2196439039),
'FJ': (2423783424, 2423848959),
'FK': (1347014656, 1347017471),
'FM': (960937984, 960942079),
'FO': (1481973760, 1481981951),
'FR': (1455423488, 1459617791),
'GA': (698220544, 698351615),
'GB': (426470231, 436207615),
'GD': (1249531904, 1249533951),
'GE': (529661952, 529727487),
'GF': (1383825920, 1383858175),
'GG': (1481736192, 1481744383),
'GH': (2594177024, 2595225599),
'GI': (1433632768, 1433640959),
'GL': (1481834496, 1481842687),
'GM': (2696282112, 2696413183),
'GN': (3314925568, 3314941951),
'GP': (1568243712, 1568276479),
'GQ': (1777065984, 1777070079),
'GR': (1581309081, 1581776895),
'GS': (2587963392, 2587963647),
'GT': (3197392384, 3197470207),
'GU': (2826633216, 2826698751),
'GW': (2588490752, 2588491775),
'GY': (3192913920, 3192946687),
'HK': (1912340480, 1912602623),
'HM': (2470577152, 2470577919),
'HN': (3050438656, 3050504191),
'HR': (1569193984, 1569718271),
'HT': (2489745408, 2489778175),
'HU': (1501822976, 1502085119),
'ID': (666894336, 671088639),
'IE': (1461714944, 1462763519),
'IL': (2218899456, 2219769855),
'IM': (1406795776, 1406803967),
'IN': (1977400193, 1979711487),
'IO': (3391909888, 3391910911),
'IQ': (636223488, 636485631),
'IR': (45088768, 46137343),
'IS': (2194669568, 2194735103),
'IT': (1325400064, 1329594367),
'JE': (2784624640, 2784690175),
'JM': (1209729024, 1209786367),
'JO': (2954657792, 2954756095),
'JP': (2231369728, 2248146943),
'KE': (1764753408, 1765801983),
'KG': (2662694912, 2662727679),
'KH': (606437376, 606470143),
'KI': (1744437248, 1744438271),
'KM': (1725921280, 1725922303),
'KN': (3431579648, 3431587839),
'KP': (2939006976, 2939007999),
'KR': (3551002624, 3556769791),
'KW': (623191334, 623378431),
'KY': (1256079360, 1256087551),
'KZ': (38273024, 38797311),
'LA': (1934901248, 1934917631),
'LB': (2995191808, 2995257343),
'LC': (1162871296, 1162879999),
'LI': (1383399424, 1383407615),
'LK': (1887830016, 1887961087),
'LR': (693501952, 693510143),
'LS': (2179465216, 2179497983),
'LT': (1312369840, 1312817151),
'LU': (2661679104, 2661875711),
'LV': (778895360, 778960895),
'LY': (704380928, 704643071),
'MA': (1769996288, 1772093439),
'MC': (1490108416, 1490124799),
'MD': (1551007744, 1551106047),
'ME': (533512192, 533528575),
'MF': (1248913408, 1248915455),
'MG': (1712324608, 1712717823),
'MH': (1969707008, 1969709055),
'MK': (1293680640, 1293811711),
'ML': (2591457280, 2591473663),
'MM': (628031488, 628064255),
'MN': (2090745856, 2090762239),
'MO': (1022754816, 1022820351),
'MP': (1701044224, 1701052415),
'MQ': (1552968704, 1552999935),
'MR': (700203008, 700219391),
'MS': (2734472192, 2734473215),
'MT': (772472832, 772538367),
'MU': (1718616064, 1719140351),
'MV': (460488704, 460505087),
'MW': (1715863552, 1715994623),
'MX': (3179282432, 3184116735),
'MY': (2944925696, 2945581055),
'MZ': (3319398400, 3319529471),
'NC': (1701117952, 1701134335),
'NE': (696918016, 696926207),
'NF': (3406950400, 3406951423),
'NG': (1768947712, 1769996287),
'NI': (3125542912, 3125673983),
'NL': (2441196709, 2446983167),
'NO': (2338936320, 2339962879),
'NP': (620494848, 620625919),
'NR': (3412254720, 3412262911),
'NU': (832319488, 832319999),
'NZ': (2050490368, 2051014655),
'OM': (86245376, 86376447),
'PA': (3125280768, 3125542911),
'PE': (3202875392, 3203399679),
'PF': (2066890752, 2066907135),
'PG': (2096152576, 2096160767),
'PH': (1892155392, 1892941823),
'PK': (656408576, 658505727),
'PL': (1392508928, 1393623705),
'PM': (1176764416, 1176768511),
'PN': (1102388736, 1102388799),
'PR': (684130304, 684195839),
'PS': (3164667904, 3164733439),
'PT': (1441792000, 1442316287),
'PW': (960950272, 960954367),
'PY': (3044540416, 3045064703),
'QA': (78249984, 78381055),
'RE': (1713569792, 1713635327),
'RO': (1450704896, 1451220991),
'RS': (3000762368, 3001024511),
'RU': (1595408384, 1595998207),
'RW': (700055552, 700121087),
'SA': (3157262336, 3157786623),
'SB': (3389104128, 3389112319),
'SC': (2598256640, 2598305791),
'SD': (1719140352, 1719664639),
'SE': (1373634560, 1374683135),
'SG': (721420288, 727187455),
'SH': (3255173120, 3255173631),
'SI': (3166961664, 3167223807),
'SJ': (758880000, 758880255),
'SK': (1315045376, 1315176447),
'SL': (1720647680, 1720680447),
'SM': (1505370112, 1505378303),
'SN': (693239808, 693370879),
'SO': (2591277056, 2591293439),
'SR': (3132325888, 3132358655),
'SS': (1777061888, 1777063935),
'ST': (961839104, 961843199),
'SV': (3193307136, 3193438207),
'SX': (3194355712, 3194363903),
'SY': (83886080, 83951615),
'SZ': (693428224, 693436415),
'TC': (1107243008, 1107247103),
'TD': (2588180480, 2588188671),
'TF': (2591589816, 2591589831),
'TG': (3299344384, 3299606527),
'TH': (2875195392, 2875719679),
'TJ': (1426685952, 1426702335),
'TK': (459282432, 459284479),
'TL': (3032326144, 3032330239),
'TM': (1599430656, 1599438847),
'TN': (3305111552, 3306393528),
'TO': (960966656, 960970751),
'TR': (1432346624, 1433353473),
'TT': (3123445760, 3123576831),
'TV': (960970752, 960974847),
'TW': (2019557376, 2021654527),
'TZ': (2627469312, 2627731455),
'UA': (1564999680, 1565368447),
'UG': (1716518912, 1717043199),
'UM': (1074392944, 1074392951),
'US': (469762048, 520093695),
'UY': (2805465088, 2805989375),
'UZ': (1412841472, 1412857855),
'VA': (961533952, 961540095),
'VC': (3485462528, 3485464575),
'VE': (3200647168, 3201302527),
'VG': (1112653824, 1112657919),
'VI': (2464284672, 2464350207),
'VN': (249561088, 251658239),
'VU': (960974848, 960978943),
'WF': (1964253184, 1964255231),
'WS': (960958464, 960962559),
'XK': (778266929, 778305535),
'YE': (2250440704, 2250506239),
'YT': (1552847360, 1552864255),
'ZA': (3309305856, 3311589983),
'ZM': (1720713216, 1721237503),
'ZW': (699351040, 699368447)
}
"Some large IP ranges for each contry. This is used if the GeoIP database is not available."
def _df(self):
"""
Load the GeoIP data files.
Download from https://dev.maxmind.com/geoip/geoip2/geolite2/ and place :attr:`data_file_locations` and :attr:`data_file_blocks_ipv4`.
"""
if not hasattr(self, "_geoip_df"):
missing_files = []
if not Path(self.geoip_file_locations).exists():
missing_files.append(self.geoip_file_locations)
if not Path(self.geoip_file_blocks_ipv4).exists():
missing_files.append(self.geoip_file_blocks_ipv4)
if len(missing_files) > 0:
raise FileNotFoundError(f"Could not find GeoIP data files {missing_files}. Download from https://dev.maxmind.com/geoip/geoip2/geolite2/")
locations_df = pd.read_csv(self.geoip_file_locations)
ipv4_df = pd.read_csv(self.geoip_file_blocks_ipv4)
self._geoip_df = locations_df.merge(ipv4_df, on="geoname_id")
return self._geoip_df
def country_ipv4(self, country_code: Optional[str] = None) -> str:
"""
Generate a random IP address for a country.
"""
if country_code is None:
country_code = self.generator.current_country_code()
# Convert country code to locale
locale_and_country = country_code.upper().replace("-", "_").split("_")
if len(locale_and_country) == 2:
country_code = locale_and_country[1]
try:
df = self._df()
df = df[df["country_iso_code"] == country_code.upper()]
df = df[df["network"].notnull()]
except FileNotFoundError as e:
# Fallback to random IP
console.log("Could not find GeoIP data files.", e, "Using fallback IP ranges")
ip = self.random_int(*self.IPV4_RANGES[country_code.upper()])
return str(ipaddress.IPv4Address(ip) + 1)
# select random row
try:
network = df.sample(n=1)['network'].values[0]
# generate random IP address in the network
# Convert network to decimals
network_obj = ipaddress.IPv4Network(network)
ip = ipaddress.IPv4Address(
random.randint(int(network_obj.network_address), int(network_obj.broadcast_address))
)
except Exception as e:
console.log(f"Could not find IP for country {country_code}", e)
ip = ipaddress.IPv4Address(random.randint(0, 2**32))
return str(ip + 1)
def otp(length=-1):
if length < 1:
length = random.randint(4, 8)
return "".join([str(random.randint(0, 9)) for _ in range(length)])
def init(locales: Set = set(AVAILABLE_LOCALES) - BROKEN_LOCALES) -> Tuple[ScammerFaker, ScammerSession]:
"""
:param locales: A list of locales to use for the request session and faker instance. If a locale is not available, a similar locales will be attempted.
"""
if isinstance(locales, str):
locales = [locales]
# Locale code is used for the session, and faker_locale is used for the faker instance
locale_code = faker_locale = random.choice(tuple(locales))
if faker_locale not in AVAILABLE_LOCALES:
# Select a random locale that is similar to the requested locale
available_locales = set(AVAILABLE_LOCALES) - set(BROKEN_LOCALES)
locale_options = [l for l in available_locales if faker_locale in l]
faker_locale = random.choice(locale_options) if len(locale_options) > 0 else DEFAULT_LOCALE
f = ScammerFaker(faker_locale)
s = ScammerSession()
f.add_provider(IINCreditCardProvider)
f.add_provider(GeoIP)
console.log(f"Using {locale_code!r} for country and {faker_locale!r} for faker locale")
client_ip = f.country_ipv4(locale_code)
s.headers["Client-IP"] = client_ip
if s.headers.get("X-Forwarded-For"):
# Inject client ip in random position
hops = s.headers["X-Forwarded-For"].split(",")
hops.insert(random.randint(0, len(hops)), client_ip)
s.headers["X-Forwarded-For"] = ",".join(hops)
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language
_cc = faker_locale.replace("_", "-")
s.headers["Accept-Language"] = f"{_cc};q=0.9, en;q=0.5"
return f, s
def introduce_qwerty_typo(s: str, accuracy=0.95) -> str:
"""
Simulate typos made with a QWERTY keyboard layout by swapping adjacent characters.
"""
# QWERTY keyboard layout
qwerty = [
'1234567890+',
'qwertyuiopå',
'asdfghjklöä',
'zxcvbnm,.-'
]
# Create a dictionary mapping each character to its adjacent characters
adjacent = {}
for row in qwerty:
for i, c_org in enumerate(row):
adj = ''
if i > 0:
adj += row[i-1]
if i < len(row) - 1:
adj += row[i+1]
adjacent[c_org] = adj
r = ""
i = 0
s_len = len(s)
while i < s_len:
c_org = s[i]
c_lower = c_org.lower()
if c_lower in adjacent and random.random() > accuracy:
# Randomly swap left or right
l_or_r = random.randint(0, 1)
typo_c = min(len(adjacent[c_lower]) - 1, l_or_r)
typo = adjacent[c_lower][typo_c]
if c_org.isupper():
typo = typo.upper()
r += typo
else:
r += c_org
i += 1
return r
locale = random.choice(AVAILABLE_LOCALES)
f = ScammerFaker(locale)
profile = f.profile()
def passwd(profile=profile):
"""
Deprecated
"""
return random.choice(
[
f.password(),
f.text(max_nb_chars=random.randint(10, 24)).strip(".").lower(),
f.word(),
profile["username"]
+ str(profile["birthdate"].year)[random.choice([0, 2, 4]) :],
]
)
import click
@click.command()
@click.argument("script", type=click.Path(exists=True))
@click.option("--mullvad/--no-mullvad", default=True, help="Enable Mullvad check")
@click.option("--slow", default=False, help="Enable slow mode")
def cli(script, mullvad=True, slow=True):
"""
Run a script and supervise it.
If the script fails, the delay between requests will increase, and if it fails too many times, it will exit.
"""
# Status of the last 30 requests
request_window: List[bool] = []
# Delay between requests. Increases if there are failures, and resets on success
delay: float = 0
# User agent to use for the next script run
browser: str
def _run_script(script):
with open(script, "r") as _fd:
g = globals()
# Used to get the user agent defined in while -loop
# Not sure IF builtins is needed, but it seems to be so that the script can access the function
g['__builtins__'] = builtins
g['__builtins__'].libscammer_get_ua = lambda: browser
code = _fd.read()
# Add Mullvad check to the script
if mullvad:
mullvad_code = r"""
import requests
r = requests.get("https://am.i.mullvad.net/json")
if not r.json()['mullvad_exit_ip']:
raise Exception("Mullvad is not connected")
"""
code = mullvad_code + code
random.seed()
return exec(code, g)
while True:
# Select a random user agent
browser_idx = random.choices(
range(0, len(USER_AGENTS)), weights=[b["pct"] for b in USER_AGENTS]
)[0]
browser = USER_AGENTS[browser_idx]["ua"]
try:
_run_script(script)
request_window.append(True)
delay = 0
if slow:
sleep_time = random.randint(5, 120)
tpl = "[bold green]Sleeping[/bold green] for [bold]{sleep_time}[/bold] seconds"
with console.status(tpl.format(sleep_time=sleep_time)) as _status:
for i in range(sleep_time):
_status.update(tpl.format(sleep_time=sleep_time-i))
time.sleep(1)
except KeyboardInterrupt as e:
raise e
except Exception as e:
console.log(f"[red]Error[/red]: {e}")
console.print_exception()
request_window.append(False)
delay = min(delay + 0.5, 2.5)
# Lower the weight of the browser that failed
USER_AGENTS[browser_idx]["pct"] *= 0.8
console.log(f"Lowering weight of {browser} to {USER_AGENTS[browser_idx]['pct']}")
finally:
successes = sum(request_window)
console.log(
f"[green]{successes}[/green] successes in the last [green]{len(request_window)}[/green] requests"
)
failures = len(request_window) - successes
sleep_time = delay * failures
with console.status(
f"[bold green]Sleeping[/bold green] for [bold]{sleep_time}[/bold] seconds"
) as _status:
time.sleep(sleep_time)
if len(request_window) > 30:
request_window.pop(0)
if successes == 0:
console.log("[red]Too many failures, exiting.[/red]")
break
if __name__ == "__main__":
cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment