Skip to content

Instantly share code, notes, and snippets.

@DanielOX
Last active January 14, 2024 12:38
Show Gist options
  • Save DanielOX/c03dd64cacc030d3a7d66407993d5fd8 to your computer and use it in GitHub Desktop.
Save DanielOX/c03dd64cacc030d3a7d66407993d5fd8 to your computer and use it in GitHub Desktop.
EasyPaisa Text Messages Parser in Python
import pandas as pd
import re
# Path of Easypaisa archived message text file
easypaisa_file = './easypaisa.txt'
with open(easypaisa_file) as f:
data = f.read()
transaction_message = []
for sms in re.split(r"(?:\r?\n){2,}",data.strip()):
if(re.search('\d+\.',sms) and 'cashback' not in sms and "Received" in sms ):
transaction_message.append(sms)
# Pre-Processing
def clean_date(date):
date = date.lower()
return date.replace('[','').replace(']','').strip()
def clean_trx(trx):
trx = trx.lower()
trx = trx.replace('trx id','').replace('.','').strip()
if not trx:
return "NULL"
return trx
def clean_amount(amount):
amount = amount.lower()
amount = re.sub('[^\d.]','',amount).strip()
if not amount:
return "NULL"
if amount[0] == '.':
return amount[1:]
return amount
def clean_sender(sender):
sender = sender.lower()
sender = re.sub('[^\d]','',sender).strip()
if not sender:
return "NULL"
return sender
# Check if found else return NULL
def function_extract(reg):
return reg.group() if reg else "NULL"
# Iterate thorugh text messages and compare
for temp in transaction_message:
date_r = re.compile(r'\[.*\] | Trx ID \d+\.',flags=re.I | re.X)
trx_r = re.compile('Trx\ ID\ \d+\.',flags=re.I | re.X)
amount_r = re.compile("Received Rs.?\ \d.*\ from")
sender_r = re.compile("from \w.*\ \d+\ ")
sender_mobile_r = re.compile("\d+")
date = clean_date(function_extract(date_r.search(temp)))
tid = clean_trx(function_extract(trx_r.search(temp)))
amount = clean_amount(function_extract(amount_r.search(temp)))
sender = clean_sender(function_extract(sender_r.search(temp)))
# Storing in list of object for later use in dataframe
transactions.append({
"date":date,
"tid":tid,
"amount":amount,
"sender": sender
})
# Convert to DataFrame
df = pd.DataFrame(transactions)
# Perform Analysis on DF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment