Skip to content

Instantly share code, notes, and snippets.

@maluta
Created May 17, 2025 21:55
Show Gist options
  • Save maluta/f7ea9f1f56463cfb66ea3ff9a3c0d2ba to your computer and use it in GitHub Desktop.
Save maluta/f7ea9f1f56463cfb66ea3ff9a3c0d2ba to your computer and use it in GitHub Desktop.
import zipfile
import os
import tempfile
import re
from datetime import datetime
import time
import argparse
def extract_txt_from_zip(zip_file, start_date, end_date):
try:
with tempfile.TemporaryDirectory() as tmpdirname:
txt_file_path = extract_files_from_zip(zip_file, tmpdirname)
if txt_file_path:
return read_and_filter_txt(txt_file_path, start_date, end_date)
else:
return None
except zipfile.BadZipFile:
return None
def extract_files_from_zip(zip_file, extract_to):
try:
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
zip_ref.extractall(extract_to)
txt_files = [
os.path.join(root, file)
for root, _, files in os.walk(extract_to)
for file in files if file.endswith('.txt')
]
return txt_files[0] if txt_files else None
except Exception as e:
return None
def read_and_filter_txt(txt_file_path, start_date, end_date):
for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']:
try:
start_date = datetime.strptime(start_date, "%d/%m/%y")
end_date = datetime.strptime(end_date, "%d/%m/%y")
trimmed_text = ""
include_message = False
with open(txt_file_path, "r", encoding=encoding) as file:
for line in file:
match = re.match(r"(\d{2}/\d{2}/\d{2}),", line)
if match:
date_str = match.group(1)
message_date = datetime.strptime(date_str, "%d/%m/%y")
include_message = start_date <= message_date <= end_date
if include_message:
trimmed_text += line
print(">")
print(trimmed_text)
return trimmed_text
except UnicodeDecodeError:
continue
return None
def main():
parser = argparse.ArgumentParser(description='Extract')
parser.add_argument('--start_date', type=str, required=True, help='Start date in DD/MM/YY format')
parser.add_argument('--end_date', type=str, required=True, help='End date in DD/MM/YY format')
parser.add_argument('--zip_path', type=str, required=True, help='Path to the WhatsApp chat zip file')
args = parser.parse_args()
start_date = args.start_date
end_date = args.end_date
zip_path = args.zip_path
extract_txt_from_zip(zip_path,start_date,end_date)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment