Skip to content

Instantly share code, notes, and snippets.

View harej's full-sized avatar

James Hare harej

View GitHub Profile
@harej
harej / bot.py
Created March 26, 2023 18:03
Phabricator task creation bot for IABot IRC channel
import irc.client
import requests
import re
from credentials import (
IRC_SERVER, IRC_CHANNEL, IRC_NICKNAME,
PHABRICATOR_API_KEY, PHID
)
authorized_hostnames = [
"user/hare",

Keybase proof

I hereby claim:

  • I am harej on github.
  • I am harej (https://keybase.io/harej) on keybase.
  • I have a public key ASDTkxXZLirRVO3P-J1MWRftp-qZeodb4Aeq9GZSazULIwo

To claim this, I am signing this object:

import requests
import json
import sys
from multiprocessing.dummy import Pool as ThreadPool
from wikidataintegrator import wdi_core, wdi_login
from wikidataintegrator.wdi_core import WDItemEngine
mediawiki_api_url = 'https://iagraph.wiki.opencura.com/w/api.php'
sparql_endpoint_url = 'https://iagraph.wiki.opencura.com/query/sparql'
import requests
import json
from time import sleep
from pprint import pprint
# This is a quick script I came up with for ingesting "munged" Wikidata TTL dumps
# into Amazon Neptune, one at a time, going as fast as possible while respecting
# queue limits.
for i in range(0, 4243):
jh@Scatter-1 ~ % curl -i http://localhost:63342/Cyberbot_II/IABot/www/setup.php\?_ijt=ek248v577c3ch1l8u1c3mq48gb
HTTP/1.1 200 OK
X-Powered-By: PHP/7.2.28
Set-Cookie: IABotManagementConsole=dbgrl4h62b779fr9luufh5qmm4; expires=Sun, 12-Apr-2020 20:54:31 GMT; Max-Age=2592000; path=/Cyberbot_II/IABot/www
Cache-Control: no-store, must-revalidate
server: PhpStorm 2019.3.3
content-length: 9486
set-cookie: Phpstorm-e21bdce2=b191c1a9-572c-4e8a-b862-7d21cf880eae; Max-Age=315360000; Expires=Mon, 11 Mar 2030 20:54:31 GMT; Path=/; HTTPOnly; SameSite=strict
Set-Cookie: IABotManagementConsole=dbgrl4h62b779fr9luufh5qmm4; expires=Sun, 12-Apr-2020 20:54:31 GMT; Max-Age=2592000; path=/Cyberbot_II/IABot/www
# Timeout
Processing wikidump-000001399.ttl.gz
SPARQL-UPDATE: updateStr=LOAD <file:///srv/mungeOut//wikidump-000001399.ttl.gz>
java.util.concurrent.TimeoutException
at java.util.concurrent.FutureTask.get(FutureTask.java:205)
at com.bigdata.rdf.sail.webapp.BigdataServlet.submitApiTask(BigdataServlet.java:292)
at com.bigdata.rdf.sail.webapp.QueryServlet.doSparqlUpdate(QueryServlet.java:460)
at com.bigdata.rdf.sail.webapp.QueryServlet.doPost(QueryServlet.java:241)
at com.bigdata.rdf.sail.webapp.RESTServlet.doPost(RESTServlet.java:269)
at com.bigdata.rdf.sail.webapp.MultiTenancyServlet.doPost(MultiTenancyServlet.java:195)
import random
import requests
from time import sleep
while True:
random_id = ''.join(random.choices('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_', k=11))
url = 'https://www.youtube.com/watch?v=' + random_id
r = requests.get(url)
if r.text.find('This video is unavailable') == -1:
print(url)
import html
import requests
import threading
class AskPubMed(threading.Thread):
def __init__ (self, threadID, name, packages):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.packages = packages
import requests
from bs4 import BeautifulSoup
niosh_mode = False
if niosh_mode == True:
seed = "https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP932%20%3Fdummy0%20.%0A%20%20%3Fitem%20wdt%3AP859%20wd%3AQ60346%20.%0A%7D"
else:
seed = "https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP932%20%3Fdummy0%20.%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP859%20wd%3AQ60346%20%7D%0A%7D"
@harej
harej / sourcemetadata_scraper.py
Created May 26, 2016 09:30
A script that scrapes tools.wmflabs.org/sourcemd
import requests
import time
import csv
from bs4 import BeautifulSoup
def main(sourcefile):
url_template = "https://tools.wmflabs.org/sourcemd/?id={0}&doit=Check+source"
with open(sourcefile) as f:
csvdump = csv.reader(f)