Skip to content

Instantly share code, notes, and snippets.

@JeelPatel231
Created August 16, 2022 17:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JeelPatel231/ed6c91921cd76f3bde1173f3c730d85e to your computer and use it in GitHub Desktop.
Save JeelPatel231/ed6c91921cd76f3bde1173f3c730d85e to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
from urllib import request
import json
HOST = ""
folder_arr = []
json_build = {}
def loop_child(path,object):
resp = request.urlopen(HOST+path)
soup = BeautifulSoup(resp, features="html.parser")
el = soup.select("#fallback > table > tr > td.fb-n > a")
object["files"] = []
for i in el:
if i["href"].endswith("/"):
object[i.text] = {}
# print(json_build) # just to know if the script is running and what is collected
loop_child(i.attrs["href"],object[i.text])
else:
if "Parent Directory" not in i.text:
object["files"].append(i.text)
if object["files"] == []:
del object["files"]
loop_child("/",json_build)
print("--------------------\n\n\n")
#final product
print(json.dumps(json_build,indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment