Created
January 11, 2017 20:55
-
-
Save christopherkullenberg/e19333272501039fbf40a3334ba14900 to your computer and use it in GitHub Desktop.
Search Alt (Right) Media
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"import io" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def searchaltmedia(searchstring):\n", | |
" urls = ['http://nyheteridag.se/?s=',\n", | |
" 'http://avpixlat.info/?s=',\n", | |
" 'http://www.friatider.se/search/node/',\n", | |
" 'http://www.klarvaken.se/?s=',\n", | |
" 'https://www.nordfront.se/?s=',\n", | |
" 'https://www.nyanserat.nu/?s=',\n", | |
" 'http://samtiden.nu/?s=']\n", | |
" \n", | |
" results = []\n", | |
"\n", | |
" for u in urls:\n", | |
" r = requests.get(u + searchstring)\n", | |
" file_like_obj = io.StringIO(r.text) \n", | |
" lines = file_like_obj.read()\n", | |
" soup = BeautifulSoup(lines, \"lxml\")\n", | |
" \n", | |
" if u == 'http://avpixlat.info/?s=':\n", | |
" posturls = soup.findAll(\"div\", { \"class\" : \"post-headline\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
" \n", | |
" elif u == 'http://nyheteridag.se/?s=' : \n", | |
" posturls = soup.findAll(\"div\", { \"class\" : \"article\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
" \n", | |
" elif u == 'http://www.friatider.se/search/node/':\n", | |
" posturls = soup.findAll(\"h3\", { \"class\" : \"title\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
" \n", | |
" elif u == 'http://www.klarvaken.se/?s=':\n", | |
" posturls = soup.findAll(\"span\", { \"class\" : \"posted-on\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
" \n", | |
" elif u == 'https://www.nordfront.se/?s=':\n", | |
" posturls = soup.findAll(\"h2\", { \"class\" : \"post-title\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
" \n", | |
" elif u == 'https://www.nyanserat.nu/?s=':\n", | |
" posturls = soup.findAll(\"h2\", { \"class\" : \"post-title\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
" \n", | |
" elif u == 'http://samtiden.nu/?s=':\n", | |
" posturls = soup.findAll(\"h2\", { \"class\" : \"cb-post-title\" })\n", | |
" for p in posturls:\n", | |
" #print(p.find('a').attrs['href'])\n", | |
" results.append(p.find('a').attrs['href'])\n", | |
"\n", | |
" \n", | |
" return results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['http://nyheteridag.se/rikspolischef-eliasson-kopplar-in-sapo-ska-forhindra-stod-till-sd-i-valrorelsen/',\n", | |
" 'http://nyheteridag.se/fuck-trump-fuck-white-people-livesand-tortyr-av-forstandshandikappad-pa-facebook/',\n", | |
" 'http://nyheteridag.se/utredningen-laggs-ner-rullstolsburen-kvinna-pa-gotland-anmalde-gruppvaldtakt-pa-toalett/',\n", | |
" 'http://nyheteridag.se/timbro-chef-varnar-for-att-moderaterna-kan-gora-katastrofval/',\n", | |
" 'http://avpixlat.info/2016/12/04/snoflingor-och-snoslungor/',\n", | |
" 'http://www.friatider.se/s-topp-hoppar-av-i-protest-mot-migrationsverket',\n", | |
" 'http://www.friatider.se/dn-halvpudlar-om-brandm-n-i-boden',\n", | |
" 'http://www.friatider.se/expressen-profil-hyllar-brevb-rare-som-sl-nger-bort-sd-valsedlar',\n", | |
" 'http://www.friatider.se/svd-kultur-r-tt-f-ngsla-misshagliga-konstn-rer',\n", | |
" 'http://www.friatider.se/dn-h-nas-p-n-tet-efter-brandmansbluff',\n", | |
" 'http://www.friatider.se/dn-lj-g-om-rasism-p-brandstation-i-boden',\n", | |
" 'http://www.friatider.se/ilska-och-gr-t-p-gp-n-r-40-journalister-f-r-sparken',\n", | |
" 'http://www.friatider.se/klart-f-r-le-pens-partigrupp',\n", | |
" 'https://www.nordfront.se/kamparbete-i-are-kommun-2.smr',\n", | |
" 'https://www.nordfront.se/dns-troll-spricker-solen.smr',\n", | |
" 'https://www.nyanserat.nu/dr-frank-salter-om-det-tyska-risktagandet/']" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"searchaltmedia('ljugmedia')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment