Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Last active September 14, 2022 10:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimitryzub/fa98a45e009a790758983e49ef70856d to your computer and use it in GitHub Desktop.
Save dimitryzub/fa98a45e009a790758983e49ef70856d to your computer and use it in GitHub Desktop.
Scrape ResearchGate Search - All Questions
# Blog post: https://serpapi.com/blog/web-scraping-all-questions-from-researchgate-search-in-python/
from parsel import Selector
from playwright.sync_api import sync_playwright
import json
def scrape_researchgate_questions(query: str):
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, slow_mo=50)
page = browser.new_page(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36")
questions = []
page_num = 1
while True:
page.goto(f"https://www.researchgate.net/search/question?q={query}&page={page_num}")
selector = Selector(text=page.content())
for question in selector.css(".nova-legacy-c-card__body--spacing-inherit"):
title = question.css(".nova-legacy-v-question-item__title .nova-legacy-e-link--theme-bare::text").get().title().strip()
title_link = f'https://www.researchgate.net{question.css(".nova-legacy-v-question-item__title .nova-legacy-e-link--theme-bare::attr(href)").get()}'
question_type = question.css(".nova-legacy-v-question-item__badge::text").get()
question_date = question.css(".nova-legacy-v-question-item__meta-data-item:nth-child(1) span::text").get()
snippet = question.css(".redraft-text").xpath("normalize-space()").get()
views = question.css(".nova-legacy-v-question-item__metrics-item:nth-child(1) .nova-legacy-e-link--theme-bare::text").get()
views_link = f'https://www.researchgate.net{question.css(".nova-legacy-v-question-item__metrics-item:nth-child(1) .nova-legacy-e-link--theme-bare::attr(href)").get()}'
answer = question.css(".nova-legacy-v-question-item__metrics-item+ .nova-legacy-v-question-item__metrics-item .nova-legacy-e-link--theme-bare::text").get()
answer_link = f'https://www.researchgate.net{question.css(".nova-legacy-v-question-item__metrics-item+ .nova-legacy-v-question-item__metrics-item .nova-legacy-e-link--theme-bare::attr(href)").get()}'
questions.append({
"title": title,
"link": title_link,
"snippet": snippet,
"question_type": question_type,
"question_date": question_date,
"views": {
"views_count": views,
"views_link": views_link
},
"answer": {
"answer_count": answer,
"answers_link": answer_link
}
})
print(f"page number: {page_num}")
# checks if next page arrow key is greyed out `attr(rel)` (inactive) and breaks out of the loop
if selector.css(".nova-legacy-c-button-group__item:nth-child(9) a::attr(rel)").get():
break
else:
page_num += 1
print(json.dumps(questions, indent=2, ensure_ascii=False))
browser.close()
scrape_researchgate_questions(query="coffee")
@dimitryzub
Copy link
Author

Part of the output:

[
  {
    "title": "Any Recommendations On An Inexpensive Coffee Grinder To Grind Up Bark Samples To Measure Ph?",
    "link": "https://www.researchgate.netpost/Any_recommendations_on_an_inexpensive_coffee_grinder_to_grind_up_bark_samples_to_measure_pH?_sg=tsmZvLsXrFpn6TG77ljxS8pVJhdOMYVlqqYhQl0BszqPCDW1__lnpczwZl8XJiVROJ8_8G8jaerzpX8",
    "snippet": "We are folloiwng protocol by Hansen et al. (2015) Sci. Pharm. They recommend a Rancilio coffee grinder but these are several hundred dollars. Hoping to use something a little less expensive.",
    "question_type": "Question",
    "question_date": "Oct 2017",
    "views": {
      "views_count": "97 Views",
      "views_link": "post/Any_recommendations_on_an_inexpensive_coffee_grinder_to_grind_up_bark_samples_to_measure_pH?_sg=tsmZvLsXrFpn6TG77ljxS8pVJhdOMYVlqqYhQl0BszqPCDW1__lnpczwZl8XJiVROJ8_8G8jaerzpX8"
    },
    "answer": {
      "answer_count": "2 Answers",
      "answers_link": "https://www.researchgate.netpost/Any_recommendations_on_an_inexpensive_coffee_grinder_to_grind_up_bark_samples_to_measure_pH?_sg=tsmZvLsXrFpn6TG77ljxS8pVJhdOMYVlqqYhQl0BszqPCDW1__lnpczwZl8XJiVROJ8_8G8jaerzpX8"
    }
  }, ... other questions
  {
    "title": "Are There Any Ways To Find The Concentration Of A Solution Where Its Chemical Formula And Number Of Moles Are Unknown? ",
    "link": "https://www.researchgate.netpost/Are_there_any_ways_to_find_the_concentration_of_a_solution_where_its_chemical_formula_and_number_of_moles_are_unknown?_sg=6W-hvIYx-FRel_YiWd62lbksTzeWP7GVkZ3tVO6SgZI7F_czhLz_oFCduq9DVhrhvIUy97168wXrn30",
    "snippet": "A comprehensive way to find the concentration of random solutions would enhance benefits related with health, industry, technology and commercial aspects. Although beer lambert law is a solution, there are some cases where Epsilon is unknown (Example: A Coca-Cola drink or a cup of coffee). In this cases, proper a​l​t​",
    "question_type": "Question",
    "question_date": "Jan 2022",
    "views": {
      "views_count": "742 Views",
      "views_link": "post/Are_there_any_ways_to_find_the_concentration_of_a_solution_where_its_chemical_formula_and_number_of_moles_are_unknown?_sg=6W-hvIYx-FRel_YiWd62lbksTzeWP7GVkZ3tVO6SgZI7F_czhLz_oFCduq9DVhrhvIUy97168wXrn30"
    },
    "answer": {
      "answer_count": "4 Answers",
      "answers_link": "https://www.researchgate.netpost/Are_there_any_ways_to_find_the_concentration_of_a_solution_where_its_chemical_formula_and_number_of_moles_are_unknown?_sg=6W-hvIYx-FRel_YiWd62lbksTzeWP7GVkZ3tVO6SgZI7F_czhLz_oFCduq9DVhrhvIUy97168wXrn30"
    }
  }
]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment