Skip to content

Instantly share code, notes, and snippets.

@trojblue
Created November 16, 2023 01:10
Show Gist options
  • Save trojblue/06bb89e9f49512d19383ed5f3cfce2eb to your computer and use it in GitHub Desktop.
Save trojblue/06bb89e9f49512d19383ed5f3cfce2eb to your computer and use it in GitHub Desktop.
User fevercell_projects.json File extract all links from twitter or x.com from this json:
import json
# Function to extract handles from a given domain in a nested dictionary
def extract_handles(data, domain):
def find_handles(d):
handles = []
for k, v in d.items():
if isinstance(v, dict):
handles.extend(find_handles(v))
elif isinstance(v, list):
for item in v:
if isinstance(item, dict):
handles.extend(find_handles(item))
elif isinstance(item, str) and domain in item:
handle = item.split(domain)[1].split()[0] # Extract handle and handle cases with trailing text
if handle: # Check if there's a handle after the split
handles.append(handle)
elif isinstance(v, str) and domain in v:
handle = v.split(domain)[1].split()[0] # Extract handle and handle cases with trailing text
if handle: # Check if there's a handle after the split
handles.append(handle)
return handles
return find_handles(data)
# Function to read JSON and extract handles
def process_json(file_path, domain):
with open(file_path, 'r') as file:
data = json.load(file)
# Extracting handles
handles = extract_handles(data, domain)
# Removing duplicates
unique_handles = list(set(handles))
return unique_handles
# Example usage
file_path = '/path/to/your/jsonfile.json' # Replace with your JSON file path
domain = 'twitter.com/' # Can be changed to other domains like 'pixiv.net/users/'
# Extracting Twitter handles
twitter_handles = process_json(file_path, domain)
# Saving to a text file
output_file_path = '/path/to/output/file.txt' # Replace with your desired output file path
with open(output_file_path, 'w') as file:
file.write('\n'.join(twitter_handles))
# Output file path for reference
print(output_file_path)
# To change the extraction to another domain, just modify the 'domain' variable accordingly.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment