Skip to content

Instantly share code, notes, and snippets.

@bemitc
Created February 18, 2022 20:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bemitc/39989f589b44ae73f1143d2e95afac93 to your computer and use it in GitHub Desktop.
Save bemitc/39989f589b44ae73f1143d2e95afac93 to your computer and use it in GitHub Desktop.
# Importing the libraries
from bs4 import BeautifulSoup
import requests
import sys
import re
import argparse
import os
import csv
# Function to generate cards using the given html page
def generateCards(htmlPage):
# Set the arrays
questionsData = []
answersData = []
# Loop the search for all of the questions, not only 30 (the Brainscape's questions limit by page)
i = 1
while i < 5:
# Look for up to 5 possible pages
url = htmlPage + '?page=' + str(i)
# Fetch the raw HTML content
content = requests.get(url).text
# Parse the desired content
soup = BeautifulSoup(content, "lxml")
# Find the cards table
cardTable = soup.find('div', {'class': 'market-content'})
# Find the questions and answers
questionsData += cardTable.find_all(attrs={'class': 'card-question-text'})
answersData += cardTable.find_all(attrs={'class': 'card-answer-text'})
i += 1
if len(questionsData) == len(answersData):
with open('export.csv', 'w', newline='') as csvfile:
ankiExport=csv.writer(csvfile)
for x in range(len(questionsData)):
ankiExport.writerow([questionsData[x].get_text().strip(), answersData[x].get_text().strip().replace('>', '\n')])
generateCards(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment