Matt Miller thisismattmiller

## gist:f842a25cde30cc01c701
var csv = require("fast-csv"),
	fs = require("fs"),
	readable = require('stream').Readable,
	jsonStream = require('JSONStream'),
	viaf = require("viaf-wrapper");


var stream = fs.createReadStream("perscorp-collection.csv");
var line = 0

## gist:736f4dc24ee5fd3eb30acd90b6816b20
import csv

#ask the CSV file to be opened
csv_file = csv.reader(open("Art Donahue data set - Sheet1.csv"), delimiter=",")

#skip the header
next(csv_file, None)

#loop over each line and read each field
for a_row in csv_file:

## gist:7cee9c6eb5db008720839f390ba5fad6
import csv
from rdflib import Graph, URIRef, Literal

#ask the CSV file to be opened
csv_file = csv.reader(open("Art Donahue data set - Sheet1.csv"), delimiter=",")


#skip the header
next(csv_file, None)

## parse.py
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString

pretty_print = lambda data: '\n'.join([line for line in parseString(data).toprettyxml(indent=' '*2).split('\n') if line.strip()])

def remove_namespace(doc, namespace):
  """Remove namespace in the passed document in place."""
  ns = u'{%s}' % namespace
  nsl = len(ns)
  for elem in doc.getiterator():

## bib
{
    "author": "Klopstock, Friedrich Gottlieb, 1724-1803.",
    "bibLevel": {
        "code": "m",
        "value": "MONOGRAPH"
    },
    "catalogDate": "2000-12-13",
    "country": {
        "code": "gw ",
        "name": "Germany"

## item
[
  {
    "_id": 10067442,
    "id": 10067442,
    "updatedDate": "2014-08-13T18:36:09Z",
    "createdDate": "2009-02-03T01:48:48Z",
    "deleted": false,
    "bibIds": [
      10291849
    ],

## moby.json
{
    "_id": "569f4bfb9923be304b6d3045",
    "classify:holdings": [
        {
            "objectLiteral": 1404,
            "objectLiteralType": "xsd:integer",
            "objectUri": null,
            "provo": {
                "created": "2016-01-20T08:57:23.668Z",
                "creator": "RI",

## resource.jsonld
{
  "@context": {
    "agents": "http://data.nypl.org/agents/",
    "bf": "http://bibframe.org/vocab/",
    "classify": "http://purl.org/library/",
    "data": "http://data.nypl.org/datasets/",
    "db": "https://NEEDTOFIX.org/what-is-db/",
    "dbo": "http://www.dbpedia.org/ontology/",
    "dbr": "http://www.dbpedia.org/resource/",
    "dcterms": "http://purl.org/dc/terms/",

## agent_search.jsonld
{
  "@context": "http://data.nypl.org/context_all.jsonld",
  "@type": "itemList",
  "itemListElement": [
    {
      "@type": "searchResult",
      "result": {
        "@type": [
          "edm:Agent",
          "foaf:Person"

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                thisismattmiller
                / README.md
            
            
              Last active
              May 24, 2016 22:36
            
              
                Convert MARC XML from Sierra export table into Shared Collection format for the sample 10K data load
              
          
    To run the script:

You need python3.4+
Download the recap_sample_convert.py
run the script and pass the filename: python3.4 recap_sample_convert.py example.xml
example.xml should be the data file exported from MARC Edit

It will create a file in the same directory converted with the same filename and "_converted.xml" appended.
	var csv = require("fast-csv"),
	fs = require("fs"),
	readable = require('stream').Readable,
	jsonStream = require('JSONStream'),
	viaf = require("viaf-wrapper");


	var stream = fs.createReadStream("perscorp-collection.csv");
	var line = 0
	import csv

	#ask the CSV file to be opened
	csv_file = csv.reader(open("Art Donahue data set - Sheet1.csv"), delimiter=",")

	#skip the header
	next(csv_file, None)

	#loop over each line and read each field
	for a_row in csv_file:
	import csv
	from rdflib import Graph, URIRef, Literal

	#ask the CSV file to be opened
	csv_file = csv.reader(open("Art Donahue data set - Sheet1.csv"), delimiter=",")


	#skip the header
	next(csv_file, None)
	import xml.etree.ElementTree as ET
	from xml.dom.minidom import parseString

	pretty_print = lambda data: '\n'.join([line for line in parseString(data).toprettyxml(indent=' '*2).split('\n') if line.strip()])

	def remove_namespace(doc, namespace):
	"""Remove namespace in the passed document in place."""
	ns = u'{%s}' % namespace
	nsl = len(ns)
	for elem in doc.getiterator():
	{
	"author": "Klopstock, Friedrich Gottlieb, 1724-1803.",
	"bibLevel": {
	"code": "m",
	"value": "MONOGRAPH"
	},
	"catalogDate": "2000-12-13",
	"country": {
	"code": "gw ",
	"name": "Germany"
	[
	{
	"_id": 10067442,
	"id": 10067442,
	"updatedDate": "2014-08-13T18:36:09Z",
	"createdDate": "2009-02-03T01:48:48Z",
	"deleted": false,
	"bibIds": [
	10291849
	],
	{
	"_id": "569f4bfb9923be304b6d3045",
	"classify:holdings": [
	{
	"objectLiteral": 1404,
	"objectLiteralType": "xsd:integer",
	"objectUri": null,
	"provo": {
	"created": "2016-01-20T08:57:23.668Z",
	"creator": "RI",
	{
	"@context": {
	"agents": "http://data.nypl.org/agents/",
	"bf": "http://bibframe.org/vocab/",
	"classify": "http://purl.org/library/",
	"data": "http://data.nypl.org/datasets/",
	"db": "https://NEEDTOFIX.org/what-is-db/",
	"dbo": "http://www.dbpedia.org/ontology/",
	"dbr": "http://www.dbpedia.org/resource/",
	"dcterms": "http://purl.org/dc/terms/",
	{
	"@context": "http://data.nypl.org/context_all.jsonld",
	"@type": "itemList",
	"itemListElement": [
	{
	"@type": "searchResult",
	"result": {
	"@type": [
	"edm:Agent",
	"foaf:Person"