Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python3
# Usage: ./run_bert.py test.txt
# Output: test.txt.bert
import sys
import requests
from pprint import pprint
#!/bin/env python3
import requests
import sys
import os
if len(sys.argv) < 2:
print("Usage: fetch_article.py ddd:010618043:mpeg21:a0339, or fetch_article <FILE>.txt")
sys.exit(-1)
@WillemJan
WillemJan / lookup_paper
Created November 25, 2020 09:44
Solr VS Oracle timing
#!/usr/bin/env python3
import requests
from lxml import etree
OAI_BASEURL = 'http://services.kb.nl/mdo/oai'
SRU_BASEURL = 'http://jsru.kb.nl/sru/sru'
EXAMPLE = "http://resolver.kb.nl/resolve?urn=ddd:010018364:mpeg21:a0232:ocr"
#!/usr/bin/env python
import random
import pygame
from pygame.locals import *
import time
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import os
import requests
import feedparser
import lxml.html
import datetime
#!/usr/bin/env python3
import requests
from lxml import etree
OAI_BASEURL = 'http://services.kb.nl/mdo/oai'
SRU_BASEURL = 'http://jsru.kb.nl/sru/sru'
EXAMPLE = "http://resolver.kb.nl/resolve?urn=ddd:010018364:mpeg21:a0232:ocr"
def get_paper_date_sru(identifier=EXAMPLE):
#!/usr/bin/env python3
import os
import json
from SPARQLWrapper import SPARQLWrapper, JSON
def load_brinkman():
if not os.path.isfile("brinkman.json"):
sparql = SPARQLWrapper("http://data.bibliotheken.nl/sparql")
@WillemJan
WillemJan / fetch_nr_of_pages_new.py
Created October 3, 2018 12:33
ocr_onderzoek/bin$ cat fetch_nr_of_pages_new.py
#!/usr/bin/env python
import json
import os
import time
import urllib
import sys
from lxml import etree as ET
from pprint import pprint
In [4]: from nltk import NaiveBayesClassifier as nbc
...: from nltk.tokenize import word_tokenize
...: from itertools import chain
...:
...: training_data = [('I love this sandwich.', 'pos'),
...: ('This is an amazing place!', 'pos'),
...: ('I feel very good about these beers.', 'pos'),
...: ('This is my best work.', 'pos'),
#!/usr/bin/env bash
#
# Update the baseurl for new files.
#
# http://wiki.dbpedia.org/dataset-categories/dbpedia-release
#
DUMP_DATE="current"
if [ ! -d "$DUMP_DATE" ]; then