Skip to content

Instantly share code, notes, and snippets.

@bitsnaps
Last active January 14, 2023 17:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bitsnaps/654315ebbbb4bbe55b6fe5d0ccc4ab7a to your computer and use it in GitHub Desktop.
Save bitsnaps/654315ebbbb4bbe55b6fe5d0ccc4ab7a to your computer and use it in GitHub Desktop.
Scraping results from Quiz And Survey Master wordpress plugin using python3
import os, csv, requests, datetime, urllib, re
from bs4 import BeautifulSoup
# from dateutil.parser import parse # if you want to parse datetime
# This script allows you to grab results from your wordpress website if you're using "Quiz And Survey" plugin.
wp_host = 'https://your-wordpress-website.com'
wp_login = f"{wp_host}/login"
wp_admin = f"{wp_host}/wp-admin/"
wp_quiz = f"{wp_host}/wp-admin/admin.php?page=mlw_quiz_results&qmn_order_by=time_taken_real"
username = 'YOUR_USER_NAME'
password = 'YOUR_PASSWORD'
quiz_params = { 'title': 'NAME OF THE QUIZ',
'fields': ['Quiz Name', 'Name', 'Email', 'Phone', 'Answers' ... (COMPETE THIS ARRAY WITH THE FIELDS YOU WANT)] }
def save_csv(filename, data):
write_mode = 'a' if os.path.exists(filename) else 'w' # make a new file if not
with open(filename, mode=write_mode) as csv_file:
data_writer = csv.writer(csv_file, delimiter=';', quotechar='"')
if write_mode == 'w':
data_writer.writerow(quiz_params['fields'])
data_writer.writerow(data)
def parse_url(session, url, title, result_file, datetime_result):
# print('parsing url: ' + url)
resp = session.get(url)
if resp.status_code==200:
data = []
data.append(title)
soup = BeautifulSoup(resp.content, features="html.parser")
# Overview Section
overview = soup.find_all(class_='overview-main-wrapper')[0]
div = overview.find_all(id='submitdiv')[0]
spans = div.find_all(class_='result-candidate-span')
for span in spans:
data.append(span.find('span').text.strip())
# Comments Section (hidden)
comments = soup.find_all(class_='comment-inner-wrap')
if (comments):
first_comment = soup.find_all(class_='comment-inner-wrap')[0]
div = first_comment.find_all(id='submitdiv')[0]
comment_content = div.find_all(class_='inside')[0]
# print(comment_content.text.strip()) # comment
data.append(comment_content.text.strip())
# Responses Section
responses_wrap = soup.find_all(class_='response-inner-wrap')[0]
responses = responses_wrap.find_all(class_='qmn_question_answer')
survey = []
for response in responses:
r = response.find_all(class_='mlw_qmn_question')[0]
question = r.find_all('b')[0]
survey.append(question.text.strip())
answer = r.find_all('span')[0]
survey.append(answer.text.strip())
# print(';'.join(survey))
data.append(';'.join(survey))
data.append(datetime_result)
save_csv(result_file, data)
else:
print('Error: status code: '+resp.status_code)
def parse_page(soup, result_file):
tbody_list = soup.find_all('tbody', id='the-list')[0] #<tbody>
trs = tbody_list.find_all('tr')
print(f"Nbr of rows in the page: {len(trs)}")
for tr in trs:
# tr.find_all('td')[0] # checkbox
rows = tr.find_all('td')
# print('Quiz Name: ', rows[1].text.strip())
# print('Name: ', rows[4].text.strip())
# print('Email: ', rows[6].text.strip())
td_quiz_name = rows[1] # Quiz Name
datetime_result = datetime.datetime.now()
user = rows[8] # User/DateTime taken
if (user.find('abbr')):
datetime_result = user.find('abbr')['title']
datetime_taken = rows[9] # User/DateTime taken
if (datetime_taken.find('abbr')):
datetime_result = datetime_taken.find('abbr')['title']
title = td_quiz_name.find_all('span')[0].text
show_link = td_quiz_name.find_all('a', href=True)[0]
url = urllib.parse.urljoin(wp_admin, show_link['href'])
# print(parse(datetime_result))
#if title == quiz_params['title']:
parse_url(s, url, title, result_file, datetime_result)
def query_parse_page(session, url_page, num_page):
result_file = 'results_'+datetime.datetime.now().strftime('%d_%m_%Y-%H_%M_%S')+'_page'+ str(num_page+1)+'.csv'
resp = session.get(url_page)
if resp.status_code==200:
soup = BeautifulSoup(resp.content, features="html.parser")
parse_page(soup, result_file)
else:
print('Cannot find page: ', url_page)
with requests.Session() as s:
s.post(wp_login, data= {'log':username, 'pwd':password})
# Encode title to URL format:
quiz_title_query = urllib.parse.urlencode( {'qsm_search_phrase': quiz_params['title'] })
# Append quiz title to the URL:
wp_quiz = f"{wp_quiz}&{quiz_title_query}"
resp = s.get(wp_quiz)
soup = BeautifulSoup(resp.content, features="html.parser")
title = soup.find_all('h2')[0]
assert 'Quiz Results' in title.text.strip()
# Loop through pages:
#'div.tablenav-pages' > 'span.displaying-num' => '209 results' (need to be stripped)
num_results = soup.find_all('span', class_='displaying-num')[0].text.strip()
print("Nbr of results: ", num_results)
#'div.tablenav-pages' > 'span.pagination-links' > 'span.paging-input' => '1 of 6'
paging_num = soup.find_all('span', class_='paging-input')
# We first parse the current (first) page
result_file = 'results_'+datetime.datetime.now().strftime('%d_%m_%Y-%H_%M_%S')+'.csv'
parse_page(soup, result_file)
# We then go through pagination
if len(paging_num) > 0: # the page may has no pagination
paging_num = paging_num[0].text.strip()
# retrieve the number of available pages
regx_paging_num = re.match(r'(\d) of (\d)', paging_num) # parse '1 of 6'
nbr_pages = -1
if regx_paging_num:
nbr_pages = regx_paging_num.group(2)# get 6 out of '1 of 6'
print('Nbr of pages: ', nbr_pages)
# Add to URL: &qsm_results_page=1
# Final URL to the page N°1: f"{wp_quiz}&qmn_order_by=time_taken_real&qsm_search_phrase=Title+of+Quiz&qsm_results_page=0"
for num_page in range(0, int(nbr_pages) - 1):
url_page = f"{wp_quiz}&qsm_results_page={num_page}"
query_parse_page(s, url_page, num_page)
else:
print('Cannot find pagination, paging num: ', paging_num)
print('done.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment