Skip to content

Instantly share code, notes, and snippets.

@fulcrum6378
Last active April 4, 2023 00:51
Show Gist options
  • Save fulcrum6378/f287d776a5882eb6f9064e455ff0bdb4 to your computer and use it in GitHub Desktop.
Save fulcrum6378/f287d776a5882eb6f9064e455ff0bdb4 to your computer and use it in GitHub Desktop.
Reorganiser for the exported followers_and_following page of Instagram! (change the timezone info to your own)
import os
import sys
from datetime import datetime
from typing import Optional
from bs4 import BeautifulSoup
from persiantools.digits import fa_to_en
from persiantools.jdatetime import JalaliDateTime
from pytz import timezone
if len(sys.argv) <= 1:
print("""
HOW-TO:
$ python ig_follow_lists.py <FILENAME|all>
For processing "all" the files in a folder, execute this command before that:
$ cd /path/to/folder
""")
quit()
org_zone = timezone("US/Pacific")
dst_zone = timezone("Asia/Tehran")
# Use pytz necessarily for Iran; because it has applied the recent law changes to the daylight savings!
months = {
"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
"Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12
}
months_persian = {
"ژانویه": 1, "فوریه": 2, "مارس": 3, "آوریل": 4, "مه": 5, "ژوئن": 6,
"ژوئیه": 7, "اوت": 8, "سپتامبر": 9, "اکتبر": 10, "نوامبر": 11, "دسامبر": 12
}
if sys.argv[1] == 'all':
files = list(os.listdir())
to_be_omitted = list()
for i in range(len(files)):
if not files[i].endswith('.html'):
to_be_omitted.append(i)
for o in reversed(to_be_omitted):
files.pop(o)
else:
files = [sys.argv[1]]
is_persian: Optional[bool] = None
for f in files:
data = open(f, 'r', encoding='utf-8').read()
html: BeautifulSoup = BeautifulSoup(data, 'html.parser')
del data
divs = html.body.find_all('div', attrs={'class': 'uiBoxWhite'})
del html
out = ""
errored = 0
iDiv = 0
for div in divs:
try:
u: str
if len(div.contents) == 2: # the rest of the pages
content = div.contents[0].contents[0].contents
u = content[0].contents[0].string
elif len(div.contents) == 3: # the 'blocked_accounts' page
content = div.contents[1].contents[0].contents
u = div.contents[0].string
else:
raise Exception("UNKNOWN DIV TYPE: " + f + " : " + str(iDiv) + " contents=" + str(len(div.contents)))
raw_datetime: str = content[1].string
if is_persian is None:
for mp in months_persian.keys():
if mp in raw_datetime:
is_persian = True
if is_persian is None:
is_persian = False
out += u + ((30 - len(u)) * " ")
if is_persian: raw_datetime = fa_to_en(raw_datetime.replace("‏", ""))
raw_date, raw_time = raw_datetime.split(", " if not is_persian else "، ")
raw_date = raw_date.split(" ")
raw_time = raw_time.split(":")
dt = datetime(int(raw_date[2]),
(months if not is_persian else months_persian)[raw_date[1]],
int(raw_date[0]),
int(raw_time[0]),
int(raw_time[1]),
tzinfo=org_zone).astimezone(dst_zone)
jdt = JalaliDateTime(dt)
out += "{:04d}".format(dt.year) + "." + "{:02d}".format(dt.month) + "." + "{:02d}".format(dt.day) + " - "
out += "{:04d}".format(jdt.year + 5000) + "." + "{:02d}".format(jdt.month) + "." + "{:02d}".format(jdt.day)
out += " - " + "{:02d}".format(dt.hour) + ":" + "{:02d}".format(dt.minute) + " " + dt.tzname()
out += "\n"
del content, u, raw_datetime, raw_date, raw_time, dt, jdt
except:
errored += 1
iDiv += 1
if errored > 0:
print(errored, "invalid objects!")
del errored, iDiv
open(f[0:f.rindex(".")] + ".txt", 'w', encoding='utf-8').write(out)
print(f, "is done.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment