Skip to content

Instantly share code, notes, and snippets.

@zdxerr
Last active May 2, 2021 19:13
Show Gist options
  • Save zdxerr/68e8bef99712303405bd24a70b9acd4b to your computer and use it in GitHub Desktop.
Save zdxerr/68e8bef99712303405bd24a70b9acd4b to your computer and use it in GitHub Desktop.
import json
import numpy as np
from matplotlib import pyplot as plt
with open("/home/christoph/Uni/Computational Argumentation/Assignment 1/demo/first_project/data.json") as json_file:
data = json.load(json_file)
print(data)
argument_length = []
opinions_by_topic = {}
opinions_by_category = {}
for opinion in data:
opinions_by_topic[opinion["topic"]] = {
"pro": len(opinion["pro_arguments"]),
"con": len(opinion["con_arguments"]),
}
try:
opinions_by_category.setdefault(opinion["category"], {})["pro"] += len(opinion["pro_arguments"])
except KeyError:
opinions_by_category.setdefault(opinion["category"], {})["pro"] = len(opinion["pro_arguments"])
try:
opinions_by_category.setdefault(opinion["category"], {})["con"] += len(opinion["con_arguments"])
except KeyError:
opinions_by_category.setdefault(opinion["category"], {})["con"] = len(opinion["con_arguments"])
for argument in opinion["pro_arguments"] + opinion["con_arguments"]:
argument_length.append(len(argument["body"]))
if True:
plt.hist(np.array(argument_length))
elif False:
x = np.arange(len(opinions_by_topic)) # the label locations
width = 0.35
fig, ax = plt.subplots()
rects1 = ax.bar(x - width / 2, [d["pro"] for d in opinions_by_topic.values()], width, label='Pro')
rects2 = ax.bar(x + width / 2, [d["con"] for d in opinions_by_topic.values()], width, label='Con')
ax.set_title("Pro and Cons by Topic")
ax.set_xticks(x)
ax.set_xticklabels(opinions_by_topic)
ax.legend()
ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)
else:
x = np.arange(len(opinions_by_category)) # the label locations
width = 0.35
fig, ax = plt.subplots()
rects1 = ax.bar(x - width / 2, [d["pro"] for d in opinions_by_category.values()], width, label='Pro')
rects2 = ax.bar(x + width / 2, [d["con"] for d in opinions_by_category.values()], width, label='Con')
ax.set_title("Pro and Cons by Category")
ax.set_xticks(x)
ax.set_xticklabels(opinions_by_category)
ax.legend()
ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)
fig.tight_layout()
# plt.title("histogram")
plt.show()
import scrapy
import w3lib
class CrawlQuotesSpider(scrapy.Spider):
'''
'''
name = "crawl_opinions"
def start_requests(self):
#predefinded pages to crawl
urls = [
'https://www.debate.org/opinions/should-young-people-take-a-gap-year',
'https://www.debate.org/opinions/social-media-is-mental-slavery',
'https://www.debate.org/opinions/should-kids-have-as-much-sugar-as-they-want',
'https://www.debate.org/opinions/is-water-wet',
'https://www.debate.org/opinions/should-homework-be-banned',
]
for url in urls:
request = scrapy.Request(url=url, callback=self.parse_opinion)
request.meta['opinion_name'] = url.split('/')[-1]
yield request
def parse_opinion(self, response):
# opinion_name = response.meta['opinion_name']
pro_arguments = []
for pro in response.css('#yes-arguments li.hasData'):
pro_arguments.append({
"title": w3lib.html.remove_tags(pro.css('h2').get()),
"body": w3lib.html.remove_tags(pro.css('p').get()),
})
con_arguments = []
for pro in response.css('#no-arguments li.hasData'):
con_arguments.append({
"title": w3lib.html.remove_tags(pro.css('h2').get()),
"body": w3lib.html.remove_tags(pro.css('p').get()),
})
yield {
"topic": w3lib.html.remove_tags(response.css('.q-title')[0].get()),
"category": w3lib.html.remove_tags(response.css('#breadcrumb a')[2].get()),
"pro_arguments": pro_arguments,
"con_arguments": con_arguments,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment