yo-bj/sersolLocalHoldingsCSV Secret

## sersolLocalHoldingsCSV
#!/usr/bin/env python

import csv
from pymarc import MARCReader
from os import listdir
from re import search
import csv_splitter
import time
import smtplib
from email.mime.text import MIMEText

# change this line to match your folder structure
SRC_DIR = '/home/libadmin/sersoltest'

# get a list of all .out files in source directory - I'm leaving this as .out for our local workflow, but you can do .mrc as well
file_list = filter(lambda x: search('.out', x), listdir(SRC_DIR))

# note 'wb' and 'rb' below are due to this script running in Windows. If you are running this script in a Linux environment you can probably do away with the 'b' part
csv_out = csv.writer(open('marc_records.csv', 'w'), delimiter = '*', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
# type out header row
csv_out.writerow(['title', 'type', 'URL', 'publisher', 'pubdate', 'public note', 'display pubnote', 'location note', 'display locnote', 'ISSN', 'coverage begin',  'coverage end', 'ISBN', 'author', 'editor', 'edition', 'language ID'])


for item in file_list:
  fd = file(SRC_DIR + '/' + item, 'r')
  reader = MARCReader(fd)
  for record in reader:

    # check to see if record is not suppressed
    bcode3 = ''
    bcode3 = record['998']['e']

    # check to see if record is not suppressed
    catdate = ''
    catdate = record['907']['c']

    # check to see if location code is not part of curriculum, reserve, order, crl, and other fun things...
    # loops for multiple location codes
    locationdata = locations = ''
    moveon = 0
    for locations in record.get_fields('998'):
      locationdata = locations.get_subfields('a')[0]
      if locationdata in ('order', 'currs', 'crl', 'ersrv', 'curre', 'stone'):
        moveon = 1
        break
      if locationdata == 'www':
        recordURL = ''
        recordURL = record['856']['u']
        # substring to find 'http://jy3ke7sv3s.search.serialssolutions.com', conditional to test if True
        if recordURL.startswith('http://jy3ke7sv3s.search.serialssolutions.com'):
          moveon = 1
          break
        # ? next test - ebrary print DDA should not be included. ?

    # time to do ALL the checks
    if bcode3 != '-' or catdate == '' or  moveon == 1:
      continue #moves on to the next record

    # reset ALL the variables... ok, just the ones used for data entry into the file.
    bibLvl = type = isbn = issn = title = author = date = recordNumber = publisher = edition = ''

    # determine if record should be treated as a "monograph" [m] or "serial" [s or i] in terms of what to pull from the record using biblvl field
    bibLvl = record['998']['c']

    # some notes
    # ISBN/ISSN - we're grabbing the first entry of the first 02X field; hence not trying to sort through all the 02X fields in one record (if multiple fields exist)

    if bibLvl in ('m', 'a'):
      type = 'Book'
      if record['020'] is not None:
       isbn = record.isbn()
      if record['250'] is not None:
       edition = record['250']['a']
      if record['100'] is not None:
       author = record.author()
      if record['250'] is not None:
       edition = record['250']['a']
    elif bibLvl in ('s', 'i', 'b'):
      type = 'Journal'
      if record['022'] is not None:
       issn = record['022']['a']

    # author
    if record['100'] is not None:
      author = record.author()
    elif record['110'] is not None:
      author = record['110']['ab']
    elif record['700'] is not None:
      author = record['700']['ab']
    elif record['710'] is not None:
      author = record['710']['ab']

    # title: 229 for serials, 245 for gov docs/others
    if bibLvl == 's':
      if record['229'] is not None:
        title = record['229']['a']
      else:
        title = record.title()
    else:
      if record['245'] is not None:
        title = record.title()
      # nonetype object attribute error forced me to comment the strip below. Gah.
        # title = title.rstrip('/')

    # publisher - since records in our db have pub info in 260 OR 264, I'm pulling this info out manually instead of using record.publisher()
    if record['260'] is not None:
      publisher = record['260']['b']
      date = record.pubyear()
    elif record['264'] is not None:
      publisher = record['264']['b']
      # date = record['264']['c']

    # record number
    if record['907'] is not None:
      recordNumber = "https://cat.lib.grinnell.edu/record=" + record['907']['a'].replace('.', '')[:-1]

    # time to clean up punctuation and initial articles!
    if title is not None:
      title = title.rstrip('/.')
      nonFiling = record['245'].indicators[1]
      nonFiling = int(nonFiling)
      title = title[nonFiling:]
      title = title.encode("utf-8")

    if author is not None:
      author = author.rstrip(',.')
      author = author.encode("utf-8")

    if publisher is not None:
      publisher = publisher.rstrip(';,.')
      publisher = publisher.encode("utf-8")

    if date is not None:
      date = date.rstrip(',.')
      date = date.encode("utf-8")

    if edition is not None:
      edition = edition.rstrip(',.')
      edition = edition.encode("utf-8")

    # order for spreadsheet, what fields correspond to what variables, and what fields are left blank because I'm lazy:
    # title > title
    # type >
    # URL > recordNumber
    # publisher > publisher
    # pubdate > date
    # public note > ''
    # display pubnote > ''
    # location note > ''
    # display locnote > ''
    # ISSN > isn
    # coverage begin > ''
    # coverage end > ''
    # ISBN > isn
    # author > author
    # editor > '' [yes, I know that we are putting editors in the author field. Deal with it or submit a PR.]
    # edition > edition
    # language ID > ''
    # alphabetization > ''

    #csv_out.writerow([isn, title, author, date, recordNumber, publisher, edition, locationdata])
    csv_out.writerow([title, type, recordNumber, publisher, date, '', '', '', '', issn, '',  '', isbn, author, '', edition, '', ''])
  fd.close()

# split out the huge CSV file - 100000 rows per output file
csv_splitter.split(open('marc_records.csv', 'r'))

# time to send the email saying that the csv file is ready to download

textfile = '/home/libadmin/sersoltest/downloadready.txt'
me = 'server_email_here'
you = 'your_email_here'

# Open a plain text file for reading.  For this example, assume that
# the text file contains only ASCII characters.
fp = open(textfile, 'rb')
# Create a text/plain message
msg = MIMEText(fp.read())
fp.close()

msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you

# Send the message via our own SMTP server, but don't include the
# envelope header.
s = smtplib.SMTP('server_address')
s.sendmail(me, [you], msg.as_string())
s.quit()
	#!/usr/bin/env python

	import csv
	from pymarc import MARCReader
	from os import listdir
	from re import search
	import csv_splitter
	import time
	import smtplib
	from email.mime.text import MIMEText

	# change this line to match your folder structure
	SRC_DIR = '/home/libadmin/sersoltest'

	# get a list of all .out files in source directory - I'm leaving this as .out for our local workflow, but you can do .mrc as well
	file_list = filter(lambda x: search('.out', x), listdir(SRC_DIR))

	# note 'wb' and 'rb' below are due to this script running in Windows. If you are running this script in a Linux environment you can probably do away with the 'b' part
	csv_out = csv.writer(open('marc_records.csv', 'w'), delimiter = '*', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
	# type out header row
	csv_out.writerow(['title', 'type', 'URL', 'publisher', 'pubdate', 'public note', 'display pubnote', 'location note', 'display locnote', 'ISSN', 'coverage begin', 'coverage end', 'ISBN', 'author', 'editor', 'edition', 'language ID'])


	for item in file_list:
	fd = file(SRC_DIR + '/' + item, 'r')
	reader = MARCReader(fd)
	for record in reader:

	# check to see if record is not suppressed
	bcode3 = ''
	bcode3 = record['998']['e']

	# check to see if record is not suppressed
	catdate = ''
	catdate = record['907']['c']

	# check to see if location code is not part of curriculum, reserve, order, crl, and other fun things...
	# loops for multiple location codes
	locationdata = locations = ''
	moveon = 0
	for locations in record.get_fields('998'):
	locationdata = locations.get_subfields('a')[0]
	if locationdata in ('order', 'currs', 'crl', 'ersrv', 'curre', 'stone'):
	moveon = 1
	break
	if locationdata == 'www':
	recordURL = ''
	recordURL = record['856']['u']
	# substring to find 'http://jy3ke7sv3s.search.serialssolutions.com', conditional to test if True
	if recordURL.startswith('http://jy3ke7sv3s.search.serialssolutions.com'):
	moveon = 1
	break
	# ? next test - ebrary print DDA should not be included. ?

	# time to do ALL the checks
	if bcode3 != '-' or catdate == '' or moveon == 1:
	continue #moves on to the next record

	# reset ALL the variables... ok, just the ones used for data entry into the file.
	bibLvl = type = isbn = issn = title = author = date = recordNumber = publisher = edition = ''

	# determine if record should be treated as a "monograph" [m] or "serial" [s or i] in terms of what to pull from the record using biblvl field
	bibLvl = record['998']['c']

	# some notes
	# ISBN/ISSN - we're grabbing the first entry of the first 02X field; hence not trying to sort through all the 02X fields in one record (if multiple fields exist)

	if bibLvl in ('m', 'a'):
	type = 'Book'
	if record['020'] is not None:
	isbn = record.isbn()
	if record['250'] is not None:
	edition = record['250']['a']
	if record['100'] is not None:
	author = record.author()
	if record['250'] is not None:
	edition = record['250']['a']
	elif bibLvl in ('s', 'i', 'b'):
	type = 'Journal'
	if record['022'] is not None:
	issn = record['022']['a']

	# author
	if record['100'] is not None:
	author = record.author()
	elif record['110'] is not None:
	author = record['110']['ab']
	elif record['700'] is not None:
	author = record['700']['ab']
	elif record['710'] is not None:
	author = record['710']['ab']

	# title: 229 for serials, 245 for gov docs/others
	if bibLvl == 's':
	if record['229'] is not None:
	title = record['229']['a']
	else:
	title = record.title()
	else:
	if record['245'] is not None:
	title = record.title()
	# nonetype object attribute error forced me to comment the strip below. Gah.
	# title = title.rstrip('/')

	# publisher - since records in our db have pub info in 260 OR 264, I'm pulling this info out manually instead of using record.publisher()
	if record['260'] is not None:
	publisher = record['260']['b']
	date = record.pubyear()
	elif record['264'] is not None:
	publisher = record['264']['b']
	# date = record['264']['c']

	# record number
	if record['907'] is not None:
	recordNumber = "https://cat.lib.grinnell.edu/record=" + record['907']['a'].replace('.', '')[:-1]

	# time to clean up punctuation and initial articles!
	if title is not None:
	title = title.rstrip('/.')
	nonFiling = record['245'].indicators[1]
	nonFiling = int(nonFiling)
	title = title[nonFiling:]
	title = title.encode("utf-8")

	if author is not None:
	author = author.rstrip(',.')
	author = author.encode("utf-8")

	if publisher is not None:
	publisher = publisher.rstrip(';,.')
	publisher = publisher.encode("utf-8")

	if date is not None:
	date = date.rstrip(',.')
	date = date.encode("utf-8")

	if edition is not None:
	edition = edition.rstrip(',.')
	edition = edition.encode("utf-8")

	# order for spreadsheet, what fields correspond to what variables, and what fields are left blank because I'm lazy:
	# title > title
	# type >
	# URL > recordNumber
	# publisher > publisher
	# pubdate > date
	# public note > ''
	# display pubnote > ''
	# location note > ''
	# display locnote > ''
	# ISSN > isn
	# coverage begin > ''
	# coverage end > ''
	# ISBN > isn
	# author > author
	# editor > '' [yes, I know that we are putting editors in the author field. Deal with it or submit a PR.]
	# edition > edition
	# language ID > ''
	# alphabetization > ''

	#csv_out.writerow([isn, title, author, date, recordNumber, publisher, edition, locationdata])
	csv_out.writerow([title, type, recordNumber, publisher, date, '', '', '', '', issn, '', '', isbn, author, '', edition, '', ''])
	fd.close()

	# split out the huge CSV file - 100000 rows per output file
	csv_splitter.split(open('marc_records.csv', 'r'))

	# time to send the email saying that the csv file is ready to download

	textfile = '/home/libadmin/sersoltest/downloadready.txt'
	me = 'server_email_here'
	you = 'your_email_here'

	# Open a plain text file for reading. For this example, assume that
	# the text file contains only ASCII characters.
	fp = open(textfile, 'rb')
	# Create a text/plain message
	msg = MIMEText(fp.read())
	fp.close()

	msg['Subject'] = 'The contents of %s' % textfile
	msg['From'] = me
	msg['To'] = you

	# Send the message via our own SMTP server, but don't include the
	# envelope header.
	s = smtplib.SMTP('server_address')
	s.sendmail(me, [you], msg.as_string())
	s.quit()