mistercrunch/parse_gong_call_html_transcript.py

## parse_gong_call_html_transcript.py
# * in chrome right click the transscript container
# * inspect the html and copy the html into your clipboard
# * paste into a file, say "/tmp/gongcall.html
# * run `python this_script.py | pbcopy` to get the transscript in your clipboard
# * paste into GPT "here's a transcript from a sales call, please summarize it"
# * ask follow up questions ...

# Make sure to install the following dep ->
# pip install beautifulsoup4

# the file where you paste the html from the Transcript page
f = "/tmp/gongcall.html"

from bs4 import BeautifulSoup

# Read the HTML from a file
with open(f, 'r') as file:
    html = file.read()

# Parse the HTML
soup = BeautifulSoup(html, 'html.parser')

# Find all the speaker and dialogue elements
monologues = soup.find_all('div', class_='monologue-inner')

# Extract the speaker and dialogue information
result = ''
for monologue in monologues:
    speaker = monologue.find('span', class_='timestamp__speaker').get_text(strip=True)
    dialogue_elements = monologue.find_all('span', class_='word-wrapper')
    dialogue = ' '.join(element.get_text(strip=True) for element in dialogue_elements)
    result += f'{speaker}: {dialogue}\n'

# Print the result
print(result)
	# * in chrome right click the transscript container
	# * inspect the html and copy the html into your clipboard
	# * paste into a file, say "/tmp/gongcall.html
	# * run `python this_script.py \| pbcopy` to get the transscript in your clipboard
	# * paste into GPT "here's a transcript from a sales call, please summarize it"
	# * ask follow up questions ...

	# Make sure to install the following dep ->
	# pip install beautifulsoup4

	# the file where you paste the html from the Transcript page
	f = "/tmp/gongcall.html"

	from bs4 import BeautifulSoup

	# Read the HTML from a file
	with open(f, 'r') as file:
	html = file.read()

	# Parse the HTML
	soup = BeautifulSoup(html, 'html.parser')

	# Find all the speaker and dialogue elements
	monologues = soup.find_all('div', class_='monologue-inner')

	# Extract the speaker and dialogue information
	result = ''
	for monologue in monologues:
	speaker = monologue.find('span', class_='timestamp__speaker').get_text(strip=True)
	dialogue_elements = monologue.find_all('span', class_='word-wrapper')
	dialogue = ' '.join(element.get_text(strip=True) for element in dialogue_elements)
	result += f'{speaker}: {dialogue}\n'

	# Print the result
	print(result)