Skip to content

Instantly share code, notes, and snippets.

@Lucs1590
Created January 7, 2022 03:25
Show Gist options
  • Save Lucs1590/48993772e6fb16c704d3f97f2c5f1629 to your computer and use it in GitHub Desktop.
Save Lucs1590/48993772e6fb16c704d3f97f2c5f1629 to your computer and use it in GitHub Desktop.
Script to get Articles from google scholar, browsing through all the pages of the searched topic.
import csv
import requests
import time
from lxml import html
title_list = []
author_list = []
def get_authors(element):
authors = ','.join(
list(map(lambda author: author.text_content(), element))
)
if not authors:
nonBreakSpace = u'\xa0'
authors = element.text_content().split(nonBreakSpace)[0]
return authors
for page in range(10, 720, 10):
page_content = requests.get(
f'https://scholar.google.com/scholar?start={str(page)}&q=(odonto+OR+dentistry)+AND+deep+learning+AND+(semantic+segmentation+AND+instance+segmentation+OR+panoptic+segmentation)+AND+CNN&hl=pt-BR&as_sdt=2007&as_ylo=2019&as_yhi=2022')
tree = html.fromstring(page_content.content)
titles = tree.xpath('//div[@class="gs_ri"]/h3/a')
authors = tree.xpath('//div[@class="gs_a"]')
title_list += list(map(lambda x: x.text_content(), titles))
author_list += list(map(lambda x: get_authors(x), authors))
time.sleep(5)
rows = zip(title_list, author_list)
with open('articles.csv', "w") as f:
writer = csv.writer(f)
for row in rows:
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment