afiaka87/transcripts.py

## transcripts.py
"""
# Setup:

`python3 -m pip install bs4`

# Usage:
    ```python3
    # Change the episode_id_param and num_pages
    python3 transcripts.py
    ```
"""
from bs4 import BeautifulSoup
import urllib

episode_id_param = 104 # Go to show on foreverdreaming. episode_id is found in the `.*/?f=(\d+).*`
num_pages = 7 # Enter the number of pages for the episode.

start_param = 0 # (dont change) The first page of results is at index 0
increment_by = 25 # (dont change) Pages are indexed via 25, 50, 75, etc.
page = "https://transcripts.foreverdreaming.org/viewforum.php?f={}&start={}" # (dont change)

page_urls = [page.format(episode_id_param, page_start) for page_start in range(start_param, num_ pages * 25, increment_by)]
each_pages_html = [urllib.request.urlopen(_url) for _url in page_urls]

links = []
for _html in each_pages_html:
    soup = BeautifulSoup(_html, "html.parser")

    links.extend(
        (link.get('href') for link in soup.findAll('a'))
    )

episode_texts = []
for link in links:
    if "./viewtopic" in link:
        full_url = "https://transcripts.foreverdreaming.org/" + link.replace("./","")
        episode_html = urllib.request.urlopen(full_url)
        soup = BeautifulSoup(episode_html, 'html.parser')
        p_elements = [elem.text for elem in soup.findAll('p')]
        print("\n".join(p_elements), file=open(link, 'w'))
	"""
	# Setup:

	`python3 -m pip install bs4`

	# Usage:
	```python3
	# Change the episode_id_param and num_pages
	python3 transcripts.py
	```
	"""
	from bs4 import BeautifulSoup
	import urllib

	episode_id_param = 104 # Go to show on foreverdreaming. episode_id is found in the `./?f=(\d+).`
	num_pages = 7 # Enter the number of pages for the episode.

	start_param = 0 # (dont change) The first page of results is at index 0
	increment_by = 25 # (dont change) Pages are indexed via 25, 50, 75, etc.
	page = "https://transcripts.foreverdreaming.org/viewforum.php?f={}&start={}" # (dont change)

	page_urls = [page.format(episode_id_param, page_start) for page_start in range(start_param, num_ pages * 25, increment_by)]
	each_pages_html = [urllib.request.urlopen(_url) for _url in page_urls]

	links = []
	for _html in each_pages_html:
	soup = BeautifulSoup(_html, "html.parser")

	links.extend(
	(link.get('href') for link in soup.findAll('a'))
	)

	episode_texts = []
	for link in links:
	if "./viewtopic" in link:
	full_url = "https://transcripts.foreverdreaming.org/" + link.replace("./","")
	episode_html = urllib.request.urlopen(full_url)
	soup = BeautifulSoup(episode_html, 'html.parser')
	p_elements = [elem.text for elem in soup.findAll('p')]
	print("\n".join(p_elements), file=open(link, 'w'))