zdxerr/histo.py

## histo.py

import json
import numpy as np
from matplotlib import pyplot as plt

with open("/home/christoph/Uni/Computational Argumentation/Assignment 1/demo/first_project/data.json") as json_file:
    data = json.load(json_file)

print(data)


argument_length = []

opinions_by_topic = {}
opinions_by_category = {}

for opinion in data:
    opinions_by_topic[opinion["topic"]] = {
        "pro": len(opinion["pro_arguments"]),
        "con": len(opinion["con_arguments"]),
    }
    try:
        opinions_by_category.setdefault(opinion["category"], {})["pro"] += len(opinion["pro_arguments"])
    except KeyError:
        opinions_by_category.setdefault(opinion["category"], {})["pro"] = len(opinion["pro_arguments"])
    try:
        opinions_by_category.setdefault(opinion["category"], {})["con"] += len(opinion["con_arguments"])
    except KeyError:
        opinions_by_category.setdefault(opinion["category"], {})["con"] = len(opinion["con_arguments"])

    for argument in opinion["pro_arguments"] + opinion["con_arguments"]:
        argument_length.append(len(argument["body"]))


if True:
    plt.hist(np.array(argument_length))
elif False:
    x = np.arange(len(opinions_by_topic))  # the label locations
    width = 0.35

    fig, ax = plt.subplots()
    rects1 = ax.bar(x - width / 2, [d["pro"] for d in opinions_by_topic.values()], width, label='Pro')
    rects2 = ax.bar(x + width / 2, [d["con"] for d in opinions_by_topic.values()], width, label='Con')

    ax.set_title("Pro and Cons by Topic")

    ax.set_xticks(x)
    ax.set_xticklabels(opinions_by_topic)
    ax.legend()

    ax.bar_label(rects1, padding=3)
    ax.bar_label(rects2, padding=3)
else:
    x = np.arange(len(opinions_by_category))  # the label locations
    width = 0.35

    fig, ax = plt.subplots()
    rects1 = ax.bar(x - width / 2, [d["pro"] for d in opinions_by_category.values()], width, label='Pro')
    rects2 = ax.bar(x + width / 2, [d["con"] for d in opinions_by_category.values()], width, label='Con')

    ax.set_title("Pro and Cons by Category")

    ax.set_xticks(x)
    ax.set_xticklabels(opinions_by_category)
    ax.legend()

    ax.bar_label(rects1, padding=3)
    ax.bar_label(rects2, padding=3)


    fig.tight_layout()

# plt.title("histogram")
plt.show()

## spider.py
import scrapy
import w3lib

class CrawlQuotesSpider(scrapy.Spider):
    '''

    '''
    name = "crawl_opinions"
    def start_requests(self):
        #predefinded pages to crawl
        urls = [
            'https://www.debate.org/opinions/should-young-people-take-a-gap-year',
            'https://www.debate.org/opinions/social-media-is-mental-slavery',
            'https://www.debate.org/opinions/should-kids-have-as-much-sugar-as-they-want',
            'https://www.debate.org/opinions/is-water-wet',
            'https://www.debate.org/opinions/should-homework-be-banned',
        ]

        for url in urls:
            request = scrapy.Request(url=url, callback=self.parse_opinion)
            request.meta['opinion_name'] = url.split('/')[-1]
            yield request

    def parse_opinion(self, response):
        # opinion_name = response.meta['opinion_name']
        pro_arguments = []
        for pro in response.css('#yes-arguments li.hasData'):
            pro_arguments.append({
                "title": w3lib.html.remove_tags(pro.css('h2').get()),
                "body": w3lib.html.remove_tags(pro.css('p').get()),
            })
        con_arguments = []
        for pro in response.css('#no-arguments li.hasData'):
            con_arguments.append({
                "title": w3lib.html.remove_tags(pro.css('h2').get()),
                "body": w3lib.html.remove_tags(pro.css('p').get()),
            })

        yield {
            "topic": w3lib.html.remove_tags(response.css('.q-title')[0].get()),
            "category": w3lib.html.remove_tags(response.css('#breadcrumb a')[2].get()),
            "pro_arguments": pro_arguments,
            "con_arguments": con_arguments,
        }

	import json
	import numpy as np
	from matplotlib import pyplot as plt

	with open("/home/christoph/Uni/Computational Argumentation/Assignment 1/demo/first_project/data.json") as json_file:
	data = json.load(json_file)

	print(data)


	argument_length = []

	opinions_by_topic = {}
	opinions_by_category = {}

	for opinion in data:
	opinions_by_topic[opinion["topic"]] = {
	"pro": len(opinion["pro_arguments"]),
	"con": len(opinion["con_arguments"]),
	}
	try:
	opinions_by_category.setdefault(opinion["category"], {})["pro"] += len(opinion["pro_arguments"])
	except KeyError:
	opinions_by_category.setdefault(opinion["category"], {})["pro"] = len(opinion["pro_arguments"])
	try:
	opinions_by_category.setdefault(opinion["category"], {})["con"] += len(opinion["con_arguments"])
	except KeyError:
	opinions_by_category.setdefault(opinion["category"], {})["con"] = len(opinion["con_arguments"])

	for argument in opinion["pro_arguments"] + opinion["con_arguments"]:
	argument_length.append(len(argument["body"]))


	if True:
	plt.hist(np.array(argument_length))
	elif False:
	x = np.arange(len(opinions_by_topic)) # the label locations
	width = 0.35

	fig, ax = plt.subplots()
	rects1 = ax.bar(x - width / 2, [d["pro"] for d in opinions_by_topic.values()], width, label='Pro')
	rects2 = ax.bar(x + width / 2, [d["con"] for d in opinions_by_topic.values()], width, label='Con')

	ax.set_title("Pro and Cons by Topic")

	ax.set_xticks(x)
	ax.set_xticklabels(opinions_by_topic)
	ax.legend()

	ax.bar_label(rects1, padding=3)
	ax.bar_label(rects2, padding=3)
	else:
	x = np.arange(len(opinions_by_category)) # the label locations
	width = 0.35

	fig, ax = plt.subplots()
	rects1 = ax.bar(x - width / 2, [d["pro"] for d in opinions_by_category.values()], width, label='Pro')
	rects2 = ax.bar(x + width / 2, [d["con"] for d in opinions_by_category.values()], width, label='Con')

	ax.set_title("Pro and Cons by Category")

	ax.set_xticks(x)
	ax.set_xticklabels(opinions_by_category)
	ax.legend()

	ax.bar_label(rects1, padding=3)
	ax.bar_label(rects2, padding=3)


	fig.tight_layout()

	# plt.title("histogram")
	plt.show()
	import scrapy
	import w3lib

	class CrawlQuotesSpider(scrapy.Spider):
	'''

	'''
	name = "crawl_opinions"
	def start_requests(self):
	#predefinded pages to crawl
	urls = [
	'https://www.debate.org/opinions/should-young-people-take-a-gap-year',
	'https://www.debate.org/opinions/social-media-is-mental-slavery',
	'https://www.debate.org/opinions/should-kids-have-as-much-sugar-as-they-want',
	'https://www.debate.org/opinions/is-water-wet',
	'https://www.debate.org/opinions/should-homework-be-banned',
	]

	for url in urls:
	request = scrapy.Request(url=url, callback=self.parse_opinion)
	request.meta['opinion_name'] = url.split('/')[-1]
	yield request

	def parse_opinion(self, response):
	# opinion_name = response.meta['opinion_name']
	pro_arguments = []
	for pro in response.css('#yes-arguments li.hasData'):
	pro_arguments.append({
	"title": w3lib.html.remove_tags(pro.css('h2').get()),
	"body": w3lib.html.remove_tags(pro.css('p').get()),
	})
	con_arguments = []
	for pro in response.css('#no-arguments li.hasData'):
	con_arguments.append({
	"title": w3lib.html.remove_tags(pro.css('h2').get()),
	"body": w3lib.html.remove_tags(pro.css('p').get()),
	})

	yield {
	"topic": w3lib.html.remove_tags(response.css('.q-title')[0].get()),
	"category": w3lib.html.remove_tags(response.css('#breadcrumb a')[2].get()),
	"pro_arguments": pro_arguments,
	"con_arguments": con_arguments,
	}