Created
February 5, 2016 08:34
-
-
Save amirouche/ef651d20b83f51a53fc7 to your computer and use it in GitHub Desktop.
Load wikidata inside a guile-wiredtiger uav database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(define-module (wikidata)) | |
(use-modules (srfi srfi-41)) | |
(use-modules (ice-9 rdelim)) | |
(use-modules (ice-9 match)) | |
(use-modules (json)) | |
(use-modules (wiredtiger)) | |
(use-modules (wiredtigerz)) | |
(use-modules (uav)) | |
;; initializes all categories based on standard environment variables (LANG etc). | |
(setlocale LC_ALL "") | |
(define (line->json line) | |
(let ((line (string-take line (- (string-length line) 1)))) | |
(with-input-from-string line | |
(lambda () | |
(read-json (current-input-port)))))) | |
(define (entity-stream filename) | |
(let ((port (open-file filename "r"))) | |
(read-line port) | |
(letrec ((next | |
(stream-lambda () | |
(let ((line (read-line port))) | |
(if (eq? (string-length line) 1) | |
(begin (close port) stream-null) | |
(stream-cons (line->json line) (next))))))) | |
(next)))) | |
(define connection (uav-open "db")) | |
(define context (uav-context-open connection)) | |
;; (define (json-ref json key default) | |
;; (let loop ((json json)) | |
;; (if (null? json) | |
;; default | |
;; (if (equal? (caar json) key) | |
;; (cadr json) | |
;; (loop (cdr json)))))) | |
(define value->uav | |
(match-lambda | |
((key . (('@ assoc ...))) (cons key (entity->uav (cons '@ assoc)))) | |
((key . value) (cons key (entity->uav value))))) | |
(define (entity->uav entity) | |
(match entity | |
(('@ assoc ...) (uav-add! context (map value->uav assoc))) | |
(_ entity))) | |
(define (maybe entity) | |
(catch #true | |
(lambda () | |
(with-transaction context | |
(entity->uav entity)) | |
(display "X")) | |
(lambda rest (display ".")))) | |
(define (make-uid context) (generate-uid (lambda (v) #false))) | |
;; (entity->uav (stream-car (entity-stream "/media/amirouche/data/data/wikidata/wikidata-20150824-all.json"))) | |
(stream-for-each maybe (entity-stream "/media/amirouche/data/data/wikidata/wikidata-20150824-all.json")) | |
(connection-close connection) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment