Skip to content

Instantly share code, notes, and snippets.

@1darshanpatil
Created December 10, 2023 19:05
Show Gist options
  • Save 1darshanpatil/e879a121d947f2ad5e529e92e750917a to your computer and use it in GitHub Desktop.
Save 1darshanpatil/e879a121d947f2ad5e529e92e750917a to your computer and use it in GitHub Desktop.
#-----------------------------------------------------------------------------------------------------------------------------------------------------#
# The purpose of this Python script is to read the contents of an HTML file containing a list of repositories, #
# extract relevant information such as the repository name, description, and last updated date using the BeautifulSoup library, #
# and then write this information into a new HTML file in the form of a table. The resulting table includes columns for the repository name, #
# description, and last updated date, and each row represents a single repository from the original list. #
#-----------------------------------------------------------------------------------------------------------------------------------------------------#
from bs4 import BeautifulSoup
with open('file1.html', 'r') as f:
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
repo_list = soup.find('ul')
with open('table.html', 'w') as f:
f.write('<table>\n')
f.write(' <thead>\n')
f.write(' <tr>\n')
f.write(' <th>Repository</th>\n')
f.write(' <th>Description</th>\n')
f.write(' <th>Last Updated</th>\n')
f.write(' </tr>\n')
f.write(' </thead>\n')
f.write(' <tbody>\n')
for repo in repo_list.find_all('li'):
repo_name = repo.find('a').text
repo_url = repo.find('a')['href']
repo_desc = repo.text.split(' - ')[1]
try:
repo_date = repo.text.split('(')[-1].strip(')\n')
except IndexError:
repo_date = ''
f.write(' <tr>\n')
f.write(f' <td><a href="{repo_url}">{repo_name}</a></td>\n')
f.write(f' <td>{repo_desc}</td>\n')
f.write(f' <td>{repo_date}</td>\n')
f.write(' </tr>\n')
f.write(' </tbody>\n')
f.write('</table>\n')
@1darshanpatil
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment