Skip to content

Instantly share code, notes, and snippets.

@Tuhin-thinks
Created September 28, 2023 14:27
Show Gist options
  • Save Tuhin-thinks/1a08c08f20b034a40c8df885e0caef61 to your computer and use it in GitHub Desktop.
Save Tuhin-thinks/1a08c08f20b034a40c8df885e0caef61 to your computer and use it in GitHub Desktop.
To expand any linkedin encded url to actual URL and save them as CSV file.
import csv
from typing import Dict, List
import re
import requests
from requests.exceptions import RequestException
def expand_url(url):
try:
resp = requests.head(url, allow_redirects=True)
return resp.url
except requests.exceptions.RequestException as e:
print(e)
return url
def read_text_file(file_path):
with open(file_path, 'r') as f:
return f.read()
def parse_urls_from_text(text):
line_pattern = r'\d\) (?P<title>.+?)\: (?P<url>.+)$'
urls = re.finditer(line_pattern, text, re.MULTILINE)
return urls
def expand_all_urls() -> List[Dict]:
text = read_text_file('system-design-45urls.txt')
all_urls = parse_urls_from_text(text)
expanded_url_list = []
for index, url in enumerate(all_urls, 1):
url: re.Match
try:
match_dict = url.groupdict()
expanded_url = expand_url(match_dict['url'].strip())
match_dict['url'] = expanded_url
expanded_url_list.append(match_dict)
except RequestException:
print(f"Failed to expand URL ({index}): ", url)
return expanded_url_list
def save_as_csv(expanded_url_list: List[Dict[str, str]]):
with open('system-design-45urls-expanded.csv', 'w') as f:
fieldnames = ['title', 'url']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(expanded_url_list)
print("writing complete ✅")
if __name__ == '__main__':
all_expanded_urls = expand_all_urls()
save_as_csv(all_expanded_urls)
@Tuhin-thinks
Copy link
Author

If you're lazy enough to run this script, here's the output CSV for you:
https://file.io/yHdj5SVXa6CR

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment