Skip to content

Instantly share code, notes, and snippets.

@amirouche
Created February 5, 2016 08:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amirouche/ef651d20b83f51a53fc7 to your computer and use it in GitHub Desktop.
Save amirouche/ef651d20b83f51a53fc7 to your computer and use it in GitHub Desktop.
Load wikidata inside a guile-wiredtiger uav database
(define-module (wikidata))
(use-modules (srfi srfi-41))
(use-modules (ice-9 rdelim))
(use-modules (ice-9 match))
(use-modules (json))
(use-modules (wiredtiger))
(use-modules (wiredtigerz))
(use-modules (uav))
;; initializes all categories based on standard environment variables (LANG etc).
(setlocale LC_ALL "")
(define (line->json line)
(let ((line (string-take line (- (string-length line) 1))))
(with-input-from-string line
(lambda ()
(read-json (current-input-port))))))
(define (entity-stream filename)
(let ((port (open-file filename "r")))
(read-line port)
(letrec ((next
(stream-lambda ()
(let ((line (read-line port)))
(if (eq? (string-length line) 1)
(begin (close port) stream-null)
(stream-cons (line->json line) (next)))))))
(next))))
(define connection (uav-open "db"))
(define context (uav-context-open connection))
;; (define (json-ref json key default)
;; (let loop ((json json))
;; (if (null? json)
;; default
;; (if (equal? (caar json) key)
;; (cadr json)
;; (loop (cdr json))))))
(define value->uav
(match-lambda
((key . (('@ assoc ...))) (cons key (entity->uav (cons '@ assoc))))
((key . value) (cons key (entity->uav value)))))
(define (entity->uav entity)
(match entity
(('@ assoc ...) (uav-add! context (map value->uav assoc)))
(_ entity)))
(define (maybe entity)
(catch #true
(lambda ()
(with-transaction context
(entity->uav entity))
(display "X"))
(lambda rest (display "."))))
(define (make-uid context) (generate-uid (lambda (v) #false)))
;; (entity->uav (stream-car (entity-stream "/media/amirouche/data/data/wikidata/wikidata-20150824-all.json")))
(stream-for-each maybe (entity-stream "/media/amirouche/data/data/wikidata/wikidata-20150824-all.json"))
(connection-close connection)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment