Lucs1590/get_scholar_articles.py

## get_scholar_articles.py
import csv
import requests
import time
from lxml import html

title_list = []
author_list = []


def get_authors(element):
    authors = ','.join(
        list(map(lambda author: author.text_content(), element))
    )
    if not authors:
        nonBreakSpace = u'\xa0'
        authors = element.text_content().split(nonBreakSpace)[0]
    return authors


for page in range(10, 720, 10):
    page_content = requests.get(
        f'https://scholar.google.com/scholar?start={str(page)}&q=(odonto+OR+dentistry)+AND+deep+learning+AND+(semantic+segmentation+AND+instance+segmentation+OR+panoptic+segmentation)+AND+CNN&hl=pt-BR&as_sdt=2007&as_ylo=2019&as_yhi=2022')
    tree = html.fromstring(page_content.content)
    titles = tree.xpath('//div[@class="gs_ri"]/h3/a')
    authors = tree.xpath('//div[@class="gs_a"]')

    title_list += list(map(lambda x: x.text_content(), titles))

    author_list += list(map(lambda x: get_authors(x), authors))

    time.sleep(5)


rows = zip(title_list, author_list)

with open('articles.csv', "w") as f:
    writer = csv.writer(f)
    for row in rows:
        writer.writerow(row)
	import csv
	import requests
	import time
	from lxml import html

	title_list = []
	author_list = []


	def get_authors(element):
	authors = ','.join(
	list(map(lambda author: author.text_content(), element))
	)
	if not authors:
	nonBreakSpace = u'\xa0'
	authors = element.text_content().split(nonBreakSpace)[0]
	return authors


	for page in range(10, 720, 10):
	page_content = requests.get(
	f'https://scholar.google.com/scholar?start={str(page)}&q=(odonto+OR+dentistry)+AND+deep+learning+AND+(semantic+segmentation+AND+instance+segmentation+OR+panoptic+segmentation)+AND+CNN&hl=pt-BR&as_sdt=2007&as_ylo=2019&as_yhi=2022')
	tree = html.fromstring(page_content.content)
	titles = tree.xpath('//div[@class="gs_ri"]/h3/a')
	authors = tree.xpath('//div[@class="gs_a"]')

	title_list += list(map(lambda x: x.text_content(), titles))

	author_list += list(map(lambda x: get_authors(x), authors))

	time.sleep(5)


	rows = zip(title_list, author_list)

	with open('articles.csv', "w") as f:
	writer = csv.writer(f)
	for row in rows:
	writer.writerow(row)