Skip to content

Instantly share code, notes, and snippets.

@jonchurch
Last active January 26, 2025 21:35
Extracting express dep tree for use with files-to-prompt
#!/bin/bash
# get all the deps
npm list --prod --all --json > deps-tree.json
import json
# Load the dependency tree JSON generated by npm
with open("deps-tree.json", "r") as f:
deps_tree = json.load(f)
# Recursive function to extract all unique dependencies
def extract_dependencies(tree, unique_deps=None):
if unique_deps is None:
unique_deps = {}
if "dependencies" in tree:
for name, details in tree["dependencies"].items():
if name not in unique_deps:
unique_deps[name] = details.get("version", "unknown")
# Recurse into nested dependencies
extract_dependencies(details, unique_deps)
return unique_deps
# Extract unique dependencies
unique_dependencies = extract_dependencies(deps_tree)
# Write deduplicated dependencies to a JSON file
with open("deduped-deps.json", "w") as f:
json.dump(unique_dependencies, f, indent=2)
print(f"Extracted {len(unique_dependencies)} unique dependencies. Saved to deduped-deps.json.")
import json
# Load the dependency tree JSON generated by npm
with open("deps-tree.json", "r") as f:
deps_tree = json.load(f)
# Recursive function to extract all unique dependencies
def extract_dependencies(tree, unique_deps=None):
if unique_deps is None:
unique_deps = {}
if "dependencies" in tree:
for name, details in tree["dependencies"].items():
if name not in unique_deps:
unique_deps[name] = details.get("version", "unknown")
# Recurse into nested dependencies
extract_dependencies(details, unique_deps)
return unique_deps
# Extract unique dependencies
unique_dependencies = extract_dependencies(deps_tree)
# Write deduplicated dependencies to a JSON file
with open("deduped-deps.json", "w") as f:
json.dump(unique_dependencies, f, indent=2)
print(f"Extracted {len(unique_dependencies)} unique dependencies. Saved to deduped-deps.json.")
{
"accepts": "https://github.com/jshttp/accepts",
"mime-types": "https://github.com/jshttp/mime-types",
"negotiator": "https://github.com/jshttp/negotiator",
"body-parser": "https://github.com/expressjs/body-parser",
"bytes": "https://github.com/visionmedia/bytes.js",
"content-type": "https://github.com/jshttp/content-type",
"debug": "git://github.com/debug-js/debu",
"ms": "https://github.com/vercel/ms",
"destroy": "https://github.com/stream-utils/destroy",
"http-errors": "https://github.com/jshttp/http-errors",
"iconv-lite": "git://github.com/ashtuchkin/iconv-lite",
"safer-buffer": "https://github.com/ChALkeR/safer-buffer",
"on-finished": "https://github.com/jshttp/on-finished",
"qs": "https://github.com/ljharb/qs",
"raw-body": "https://github.com/stream-utils/raw-body",
"unpipe": "https://github.com/stream-utils/unpipe",
"type-is": "https://github.com/jshttp/type-is",
"media-typer": "https://github.com/jshttp/media-typer",
"mime-db": "https://github.com/jshttp/mime-db",
"content-disposition": "https://github.com/jshttp/content-disposition",
"safe-buffer": "git://github.com/feross/safe-buffer",
"cookie-signature": "https://github.com/visionmedia/node-cookie-signature",
"cookie": "https://github.com/jshttp/cookie",
"encodeurl": "https://github.com/pillarjs/encodeurl",
"escape-html": "https://github.com/component/escape-html",
"etag": "https://github.com/jshttp/eta",
"finalhandler": "https://github.com/pillarjs/finalhandler",
"parseurl": "https://github.com/pillarjs/parseurl",
"statuses": "https://github.com/jshttp/statuses",
"fresh": "https://github.com/jshttp/fresh",
"depd": "https://github.com/dougwilson/nodejs-depd",
"inherits": "git://github.com/isaacs/inherits",
"setprototypeof": "https://github.com/wesleytodd/setprototypeof",
"toidentifier": "https://github.com/component/toidentifier",
"merge-descriptors": "https://github.com/sindresorhus/merge-descriptors",
"ee-first": "https://github.com/jonathanong/ee-firs",
"once": "git://github.com/isaacs/once",
"wrappy": "https://github.com/npm/wrappy",
"proxy-addr": "https://github.com/jshttp/proxy-addr",
"forwarded": "https://github.com/jshttp/forwarded",
"ipaddr.js": "git://github.com/whitequark/ipaddr.js",
"side-channel": "https://github.com/ljharb/side-channel",
"es-errors": "https://github.com/ljharb/es-errors",
"object-inspect": "git://github.com/inspect-js/object-inspec",
"side-channel-list": "https://github.com/ljharb/side-channel-lis",
"side-channel-map": "https://github.com/ljharb/side-channel-map",
"call-bound": "https://github.com/ljharb/call-bound",
"call-bind-apply-helpers": "https://github.com/ljharb/call-bind-apply-helpers",
"function-bind": "https://github.com/Raynos/function-bind",
"get-intrinsic": "https://github.com/ljharb/get-intrinsic",
"es-define-property": "https://github.com/ljharb/es-define-property",
"es-object-atoms": "https://github.com/ljharb/es-object-atoms",
"get-proto": "https://github.com/ljharb/get-proto",
"dunder-proto": "https://github.com/es-shims/dunder-proto",
"gopd": "https://github.com/ljharb/gopd",
"has-symbols": "git://github.com/inspect-js/has-symbols",
"hasown": "https://github.com/inspect-js/hasOwn",
"math-intrinsics": "https://github.com/es-shims/math-intrinsics",
"side-channel-weakmap": "https://github.com/ljharb/side-channel-weakmap",
"range-parser": "https://github.com/jshttp/range-parser",
"router": "https://github.com/pillarjs/router",
"array-flatten": "git://github.com/blakeembrey/array-flatten",
"is-promise": "https://github.com/then/is-promise",
"methods": "https://github.com/jshttp/methods",
"path-to-regexp": "https://github.com/pillarjs/path-to-regexp",
"utils-merge": "git://github.com/jaredhanson/utils-merge",
"send": "https://github.com/pillarjs/send",
"serve-static": "https://github.com/expressjs/serve-static",
"vary": "https://github.com/jshttp/vary"
}
import json
import subprocess
import shutil
from pathlib import Path
# Load repository URLs from JSON
with open("repo-urls.json", "r") as f:
repo_urls = json.load(f)
# Directory to store processed files
output_dir = Path("processed-files")
output_dir.mkdir(exist_ok=True)
# Temporary directory for cloning repositories
temp_repo_dir = Path("temp-repo")
# Process each repository
for package, url in repo_urls.items():
if url in ["No repository URL found", "Error fetching URL"]:
print(f"Skipping {package}: {url}")
continue
print(f"Cloning {package} from {url}...")
# Extract the owner/repo path from the GitHub URL
if "github.com" in url:
repo_path = url.split("github.com/")[-1].rstrip(".git")
else:
print(f"Invalid GitHub URL for {package}: {url}")
continue
# Clone the repository using `gh repo clone`
try:
subprocess.run(
["gh", "repo", "clone", repo_path, str(temp_repo_dir)],
check=True
)
except subprocess.CalledProcessError:
print(f"Failed to clone {package} using gh CLI. Skipping...")
continue
# Process the repository with files-to-prompt
print(f"Processing {package}...")
try:
output_file = output_dir / f"{package}.txt"
subprocess.run(
["files-to-prompt", str(temp_repo_dir), "--output", str(output_file)],
check=True
)
except subprocess.CalledProcessError:
print(f"Failed to process {package}. Skipping...")
# Clean up the temporary directory before continuing
shutil.rmtree(temp_repo_dir, ignore_errors=True)
continue
# Delete the temporary repository directory
print(f"Cleaning up {package}...")
shutil.rmtree(temp_repo_dir, ignore_errors=True)
print("All repositories processed.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment