Skip to content

Instantly share code, notes, and snippets.

@0x48piraj
Created March 8, 2020 14:23
Show Gist options
  • Save 0x48piraj/b982d465cbc4e0521fc1afd2d9250af7 to your computer and use it in GitHub Desktop.
Save 0x48piraj/b982d465cbc4e0521fc1afd2d9250af7 to your computer and use it in GitHub Desktop.
Extracting Gizmodo journalists in #cybersecurity
# https://gizmodo.com/tag/cybersecurity?startIndex=20
import requests
import re
lst=[]
for qr in range(20,481,20): # 480
r = requests.get("https://gizmodo.com/tag/cybersecurity?startIndex=" + str(qr))
for i in list(set(re.findall(r'(?<=kinja.com/)[^"]*', str(r.content)))):
if "&quot;," in i:
lst.append(i.split("&quot;,")[0])
for q in range (481, 495):
r = requests.get("https://gizmodo.com/tag/cybersecurity?startIndex=" + str(q))
for i in list(set(re.findall(r'(?<=kinja.com/)[^"]*', str(r.content)))):
if "&quot;," in i:
print(i.split("&quot;,")[0])
lst.append(i.split("&quot;,")[0])
writers = list(set(lst))
# writers = ['jack-loftus-old', 'caseychan', 'kylenw', 'liptakaa', 'rosa', 'chengela', 'mattnovak', 'seanhollister', 'sidneyfussell', 'maddiestone', 'ajdellinger', 'andrewcouts', 'sambiddle', 'catiekeck', 'laurendavis', 'kevin-lee-old', 'tjenningsbrown', 'darrenorf', 'alejandroalba', 'chris-mills', 'dan-nosowitz-old', 'fruitsoftheweb', 'ericlimer', 'robschoon', 'davidnield', 'conger', 'kcampbelldollaghan', 'rmisra', 'kyle-vanhemert-old', 'rtgonzalez', 'rhettjonesgizmodo', 'hudsonhongo', 'williamturton', 'john-herrman-old', 'leahbecerra', 'michaelfnunez', 'alexcranz', 'bryanlufkin', 'ace', 'estheringlis-arkell', 'acovert31', 'evepeyser', 'robertsorokanich', 'georgedvorsky', 'carlivelocci', 'jcondliffe', 'sophiekleeman', 'sean-fallon-old', 'knibbs', 'tommckay', 'dellcam', 'libbywatson', 'Mark-Strauss', 'nicolewetsman', 'melaniehannah', 'annaleenewitz', 'ashleyfeinberg']
# for writer in writers:
# webbrowser.open("https://kinja.com/{}".format(writer))
"""
<div class="large-4 columns sidebar"><div class="sidebar-container js_sidebar-actual-container"><div class="secondary-nav-btn-wrapper hide-for-large-up js_secondary-nav-btn-wrapper transition"><a class="secondary-nav__item secondary-nav__item--inline js_toggle-secondary-nav icon--svg u-darkened">Posts<svg class="svg-icon svg-chevron chevron--bottom"><use xlink:href="#iconset-chevron-bottom"></use></svg></a></div><div itemscope="" itemtype="http://schema.org/Person" class="sidebar-header row"><div class="column"><div class="row"><div class="column"><div class="profile-avatar" style="background-image: url(https://i.kinja-img.com/gawker-media/image/upload/s--v_un4kjb--/c_fill,fl_progressive,g_center,h_200,q_80,w_200/byofaowfchgc8oisypcx.jpg)"></div><img itemprop="image" src="https://i.kinja-img.com/gawker-media/image/upload/s--v_un4kjb--/c_fill,fl_progressive,g_center,h_200,q_80,w_200/byofaowfchgc8oisypcx.jpg" style="display:none;"></div></div><div class="row"><div class="column"><div itemprop="name" class="profile-displayname text-center">Bryan Menegus</div><div itemprop="alternateName" class="profile-screenname text-center"><svg class="svg-icon svg-user"><use xlink:href="#iconset-user"></use></svg>fruitsoftheweb</div><div class="follow-controls follow-controls--profile js_user-follow-controls" data-authorid="5876237249236384217"><a href="#" data-analytics-target="profileLeftBar" data-action="follow" class="js_followforuser button small list-entity__button--follow icon--svg list-control " data-ga="[Profile Page, Follow Link Click, js_pageType]"><span>Follow</span><svg class="svg-icon small svg-add--small"><use xlink:href="#iconset-add--small"></use></svg></a><a href="#" data-analytics-target="profileLeftBar" data-action="unfollow" class="js_unfollowforuser button small list-entity__button--following icon--svg list-control following list-control--active hide"><span>Following</span><svg class="svg-icon small svg-checkmark--small"><use xlink:href="#iconset-checkmark--small"></use></svg></a></div></div></div><div class="row"><div class="column"><div class="user-details"><p class="user-details__bio">Senior reporter
</p><div class="user-details__links"><a class="user-details__twitter user-details--flair" href="https://twitter.com/BryanDisagrees" title="Twitter Handle" target="_blank"><svg class="svg-icon svg-icon--filled svg-twitter"><use xlink:href="#iconset-twitter"></use></svg>@BryanDisagrees</a></div><hr><div class="user-details__crypto"><p>PGP Fingerprint: 1905 9104 D967 2EB7 C3F5 68F9 9108 1434 C917 C1B9</p></div></div></div></div><div class="row"><div class="column sidebar-follow small-6 text-center"><a href="/fruitsoftheweb/followers" class="sidebar-follow__link">Followers (447)</a></div><div class="column sidebar-follow small-6 text-center"><a href="/fruitsoftheweb/following" class="sidebar-follow__link">Following (3)</a></div></div></div></div><div class="row hide-for-medium-down"><div class="column"><ul class="sidebar-nav__list"><li class="sidebar-nav__item active"><a class="sidebar-nav__link" href="/fruitsoftheweb/posts">Posts<svg class="svg-icon svg-chevron chevron--right"><use xlink:href="#iconset-chevron-right"></use></svg></a></li><li class="sidebar-nav__item"><a class="sidebar-nav__link" href="/fruitsoftheweb/discussions">Discussions<svg class="svg-icon svg-chevron chevron--right"><use xlink:href="#iconset-chevron-right"></use></svg></a></li><li class="sidebar-nav__item"><a class="sidebar-nav__link" href="/fruitsoftheweb/saved">Saved Articles<svg class="svg-icon icon-bookmark"><use xlink:href="#iconset-bookmark"></use></svg><svg class="svg-icon svg-chevron chevron--right"><use xlink:href="#iconset-chevron-right"></use></svg></a></li></ul></div></div></div></div>4
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment