María A. Matienzo anarchivist

## iii_calc_checkdigit.rb
# take an string w/ an iii record number without recordtype prefix
# and without check digit and calculate its check digit
def calc_check_digit(digits)
  digit_seq = digits.split.reverse
  sum = 0
  multiplier = 2
  digit_seq.each { |digit|
    sum += digit.to_i * multiplier
    multiplier += 1
  }

## apachesolr_extract_text.module
/**
 * Extracts text using Tika deployed from within Solr. Assumes the following
 * about your Solr config:
 * - ExtractingRequestHandler lives at (solr URL)/extract, not /update/extract
 * - ExtractingRequestHandler is set to extract only (ext.extract.only=true)
 * - ExtractingRequestHandler only returns text within the body tags of the
 *   XHTML response (ext.xpath=/xhtml:html/xhtml:body/descendant:node())
 *
 * @param $path
 *   string containing path of file to have text extracted

## resource_map_validator.py
# from mjgiarlo's OAI-ORE validator code:
# http://lackoftalent.org/michael/blog/2009/07/31/validating-ore-from-the-command-line/
# to use: python validate.py {URL}

import sys
from foresite import *

rem = RdfLibParser().parse(ReMDocument(sys.argv[1]))
aggr = rem.aggregation
n3 = RdfLibSerializer('n3')

## iii_to_unicode.php
<?php

/*
Changes III's unicode brackets into encoded unicode characters.

We have found this more reliable than using the pre-encoded values from the interface. The XRecord will give the bracket output.

This presumes you have your database in unicode in iii. iii can convert your database to unicode for you. call the helpdesk

If you see codes like {231} without the 'u' then you haven't converted or are entering them in old format

## gist:255248
# loop over a series of MARC files to extract record numbers, with hints to figure out which records are missing them
import pymarc
c = 0
w = open('ids','w')
for f in ('hspall', 'lcpraream', 'lcprarenot', 'lcpnotrare'):
  r = pymarc.MARCReader(file(f))
  for _ in r:
    c += 1
    try:
      w.write(_['001'].format_field() + '\n')

## gist:259281
from pymarc import MARCReader, Record, Field

class AlephSequentialReader(MARCReader):
    """
    An iterator class for reading a file of MARC records in Aleph Sequential
    format, which subclasses pymarc's MARCReader. Based on Tim Prettyman's
    MARC::File::AlephSeq Perl code.

    """
    def __init__(self, marc_target):

## gist:261451
#!/usr/bin/env python
"""ln_s.py: Use the ln-s.net URL redirector
Mark Matienzo - October 2007"""

import getopt, re, sys, urllib, urllib2

_api_url = 'http://ln-s.net/home/api.jsp'
_help_msg = 'For help, use, -h or --help'
_invalid_url = 'Invalid URL (must start with either "http://" or "https://")'

## gist:261457
import csv
import re
import time
import string
import sys
import urllib
import urllib2

import pymarc

## off-campus-housing-feed.py
#!/usr/bin/env python

# Gets listings from Yale University's off-campus housing site and generates
# feeds, if you're into that sort of thing.

import sys
import time
import urllib2

from BeautifulSoup import BeautifulSoup

## marc-tags.py
#!/usr/bin/env python

# originally by edsu - original at http://inkdroid.org/bzr/bin/marc-tags

import pymarc
import sys

stats = {}

def tally(r):
	# take an string w/ an iii record number without recordtype prefix
	# and without check digit and calculate its check digit
	def calc_check_digit(digits)
	digit_seq = digits.split.reverse
	sum = 0
	multiplier = 2
	digit_seq.each { \|digit\|
	sum += digit.to_i * multiplier
	multiplier += 1
	}
	/**
	* Extracts text using Tika deployed from within Solr. Assumes the following
	* about your Solr config:
	* - ExtractingRequestHandler lives at (solr URL)/extract, not /update/extract
	* - ExtractingRequestHandler is set to extract only (ext.extract.only=true)
	* - ExtractingRequestHandler only returns text within the body tags of the
	* XHTML response (ext.xpath=/xhtml:html/xhtml:body/descendant:node())
	*
	* @param $path
	* string containing path of file to have text extracted
	# from mjgiarlo's OAI-ORE validator code:
	# http://lackoftalent.org/michael/blog/2009/07/31/validating-ore-from-the-command-line/
	# to use: python validate.py {URL}

	import sys
	from foresite import *

	rem = RdfLibParser().parse(ReMDocument(sys.argv[1]))
	aggr = rem.aggregation
	n3 = RdfLibSerializer('n3')
	<?php

	/*
	Changes III's unicode brackets into encoded unicode characters.

	We have found this more reliable than using the pre-encoded values from the interface. The XRecord will give the bracket output.

	This presumes you have your database in unicode in iii. iii can convert your database to unicode for you. call the helpdesk

	If you see codes like {231} without the 'u' then you haven't converted or are entering them in old format
	# loop over a series of MARC files to extract record numbers, with hints to figure out which records are missing them
	import pymarc
	c = 0
	w = open('ids','w')
	for f in ('hspall', 'lcpraream', 'lcprarenot', 'lcpnotrare'):
	r = pymarc.MARCReader(file(f))
	for _ in r:
	c += 1
	try:
	w.write(_['001'].format_field() + '\n')
	from pymarc import MARCReader, Record, Field

	class AlephSequentialReader(MARCReader):
	"""
	An iterator class for reading a file of MARC records in Aleph Sequential
	format, which subclasses pymarc's MARCReader. Based on Tim Prettyman's
	MARC::File::AlephSeq Perl code.

	"""
	def __init__(self, marc_target):
	#!/usr/bin/env python
	"""ln_s.py: Use the ln-s.net URL redirector
	Mark Matienzo - October 2007"""

	import getopt, re, sys, urllib, urllib2

	_api_url = 'http://ln-s.net/home/api.jsp'
	_help_msg = 'For help, use, -h or --help'
	_invalid_url = 'Invalid URL (must start with either "http://" or "https://")'
	import csv
	import re
	import time
	import string
	import sys
	import urllib
	import urllib2

	import pymarc
	#!/usr/bin/env python

	# Gets listings from Yale University's off-campus housing site and generates
	# feeds, if you're into that sort of thing.

	import sys
	import time
	import urllib2

	from BeautifulSoup import BeautifulSoup
	#!/usr/bin/env python

	# originally by edsu - original at http://inkdroid.org/bzr/bin/marc-tags

	import pymarc
	import sys

	stats = {}

	def tally(r):