shamrt/journal_list.py

## journal_list.py
# PROGRAM NAME: journal_list.py

# Displays the title of every journal in the JSTOR Early Journal Content Data Bundle.
# Highlights those with "Psych" or "Philos" in the title (+ two general science journals).
# Displays the filename of the first article for each journal.

# Written by Christopher D Green
# January 2014

# Modified by (if you modify this program, put your name here.)
# Date Modified (if you modify this program, put the date of the modification here.)

#START OF PROGRAM
import os #You need to import this in order to read the filenames from the disk below.

#Below is the directory path to where I am keeping the files on my computer.
#You will need to change it to suit your own system.
dirname='/Users/chriso/Desktop/JSTOR-journals/bundle/'

#Puts list of filename in the variable "filenames"
filenames=os.listdir(dirname)

#Initializing a few variables.
contents=''; title=''; lasttitle1=''; lasttitle2=''; lasttitle3=''; i=0


for i in range(len(filenames)):                   #Looping through each file...
    if filenames[i]!='.DS_Store':                 #Don't read this file (which causes Python to crash)
        filename=dirname+filenames[i]             #Create the full path to ech file
        file=open(filename,'r', encoding='utf-8') #Open the file for reading (in unicode)
        contents=file.read()                      #Put the entire contents of the file in a variable called "contents"
        file.close()                              #Close the file (so you don't overwrite it later)
        if '<journaltitle>' in contents:             #If the file comes from a journal...
            start=contents.find('<journaltitle>')+14 #Find the start of the journal's title
            end=contents.find('</journaltitle>')     #Find the end of the journal's title
            title=contents[start:end]                #Put everything in between in a variable called "title"
            #First, check that you haven't seen this title recently
            if title!=lasttitle1 and title!=lasttitle2 and title!=lasttitle3:
                #Then, check if the journal title is one of those in which you are particularly interested.
                if 'Psych' in title or 'Philos' in title or 'Monist' in title or title=='Science' or 'Scientific Monthly' in title:
                    print()
                    print('***',title, filenames[i][8:]) #If it is, highlight the title on the screen.
                    print()
                else:
                    print(title, filenames[i][8:])       #Display other journal title on the screen too.
                lasttitle3=lasttitle2                    #Move your recently-seen titles up the line.
                lasttitle2=lasttitle1
                lasttitle1=title


#END OF PROGRAM

## move_psychphil.py
#PROGAM NAME: move_psychphil.py (ver 1.0)

# Extracts article titles, author names, volume numbers, and page numbers
# for Psych, Philos, and some science journals in the JSTOR EARLY JOURNAL
# CONTENT DATA BUNDLE. Displays these on screen and creates file names to
# copy them to their own folder (but does not actually do the copying).
# Also creates .csv list of articles from The Monist only.

# Written by Christopher D Green
# January 2014
# Modified April 2014 to not move files, but just list their original file names in the Excel file.

# START PROGRAM
import os #needed to navigate directory tree of resident harddisk

#set from and to directories
fromdirname='/Users/chriso/Desktop/JSTOR-journals/bundle/'
todirname='/Users/chriso/Desktop/JSTOR-journals/temp/'

#read filenames from directory
filenames=os.listdir(fromdirname)

#initialize some variables
contents=''; title=''; lasttitle1=''; lasttitle2=''; lasttitle3=''; i=0
jnum=-1; volnum='0'; pnum='0'

#setting up csv file that lists authors, titles, etc. for Phil Review articles
listfile=open(todirname+'Monist.csv', 'w')
listfile.write('year,vol,page,author,title\n')
listfile.close

#list of desired journals and the abbreviations to be used in filenames
journals=[['The Journal of Philosophy, Psychology and Scientific Methods',
                'The Philosophical Review', 'The Monist'],
                ['JPPSM','PhilRev','Monist']]
#other interesting journals: The American Journal of Psychology, Science,
#     The Scientific Monthly, The Journal of Speculative Philosophy

for i in range(len(filenames)):
    if filenames[i]!='.DS_Store':
        filename=fromdirname+filenames[i]
        #print(filenames[i])
        file=open(filename,'r', encoding='utf-8')
        contents=file.read()
        file.close()
        if '<journaltitle>' in contents:
            start=contents.find('<journaltitle>')+14   #find start of journal title
            end=contents.find('</journaltitle>')       #find end of journal title
            jtitle=contents[start:end]                 #get journal title
        if jtitle in journals[0]:                      #is it one of the journal I want?
            jnum=journals[0].index(jtitle)
            typeloc=contents.find('<type>')            #is it a full article?
            if contents[typeloc+6:typeloc+9]=='fla':
                #print ('.', end='')
                print(jtitle)
                volstart=contents.find('<volume>')+8   #find start of volume number
                volend=contents.find('</volume>')      #find end of volume number
                volnum=contents[volstart:volend]       #get volume number

                pstart=contents.find('<fpage>')+7      #find start of page number
                pend=contents.find('</fpage>')         #find end of page number
                pnum=contents[pstart:pend]             #get initial page number

                yearstart=contents.find('<year>')+6    #find start of year
                year=contents[yearstart:yearstart+4]   #get volume number


                atitlestart=contents.find('<title>')+7 #find start of article title
                atitleend=contents.find('</title>')    #find end of article title
                atitle=contents[atitlestart:atitleend] #get article
                while ',' in atitle:
                    commaloc=atitle.find(',')          #find commmas in atitle...
                    atitle=atitle[:commaloc]+'`'+atitle[commaloc+1:] #...replace with `
                print(atitle)

                authorstart=contents.find('<authors>\n    <list-item>')+25 #find start of author's name
                authorend=''; k=0; author=''               #finding the end of the author's name is a little more complex.
                while '<' not in author:                   #the < marks the </list-item> tag at the end of the 1st author's name
                    author=author+contents[authorstart+k]  #accumulate author's name character by character
                    k=k+1                                  #ideally, we would like to list all authors. Haven't done that here.
                author=author[:-1]                         #removes the < at the end of the author's name
                if author=='P. C.':
                    author='Paul Carus'

                revauthor='';k=1;                          #putting lastname first
                while len(author)-k>=0 and author[len(author)-k] != ' ': #finding the space
                    k=k+1                                  #between the first & last names
                lastname=author[len(author)-k+1:]
                lastname.lower(); lastname.capitalize()    #setting Lastname
                firstname=author[:len(author)-k+1]         #(if it was all CAPS)
                firstname.lower(); firstname.capitalize()  #setting Firstname
                #Occasionally names come out all in CAPS anyway. Check by hand.
                revauthor=lastname+firstname

                while ' ' in revauthor: #removing spaces
                    spaceloc=revauthor.find(' ')
                    revauthor=revauthor[:spaceloc]+revauthor[spaceloc+1:]
                while '.' in revauthor:  #removing dots
                    dotloc=revauthor.find('.')
                    revauthor=revauthor[:dotloc]+revauthor[dotloc+1:]
                #This process still fails on names with Jr. at the end.
                #These must be corrected by hand at present.

                if len(author) < 3:
                    revauthor='NoAuthor'
                print(author)

                #assemble name of file to which article will be saved
                outfilename=journals[1][jnum]+'.'+volnum+'.'+pnum+'.txt'
                fullpathname=todirname+outfilename
                #print(fullpathname)
                print()

                fullpath=open(fullpathname, 'w', encoding='utf-8') #writes files to a new directory
                fullpath.write(contents)
                fullpath.close()
                #print ('*', end='')

                if jtitle=='The Monist':     #Creates csv list of Monist articles
                    listfile=open(todirname+'Monist.csv', 'a', encoding='utf-8')
                    listfile.write(filenames[i]+','+year+','+volnum+','+pnum+','+revauthor+','+atitle+','+outfilename+'\n')
                    listfile.close()
                if jtitle=='The Journal of Philosophy, Psychology and Scientific Methods':     #Creates csv list of JPPSM articles
                    listfile=open(todirname+'JPPSM.csv', 'a', encoding='utf-8')
                    listfile.write(filenames[i]+','+year+','+volnum+','+pnum+','+revauthor+','+atitle+','+outfilename+'\n')
                    listfile.close()
                if jtitle=='The Philosophical Review':     #Creates csv list of Phil Rev articles
                    listfile=open(todirname+'PhilRev.csv', 'a', encoding='utf-8')
                    listfile.write(filenames[i]+','+year+','+volnum+','+pnum+','+revauthor+','+atitle+','+outfilename+'\n')
                    listfile.close()


# END PROGRAM
	# PROGRAM NAME: journal_list.py

	# Displays the title of every journal in the JSTOR Early Journal Content Data Bundle.
	# Highlights those with "Psych" or "Philos" in the title (+ two general science journals).
	# Displays the filename of the first article for each journal.

	# Written by Christopher D Green
	# January 2014

	# Modified by (if you modify this program, put your name here.)
	# Date Modified (if you modify this program, put the date of the modification here.)

	#START OF PROGRAM
	import os #You need to import this in order to read the filenames from the disk below.

	#Below is the directory path to where I am keeping the files on my computer.
	#You will need to change it to suit your own system.
	dirname='/Users/chriso/Desktop/JSTOR-journals/bundle/'

	#Puts list of filename in the variable "filenames"
	filenames=os.listdir(dirname)

	#Initializing a few variables.
	contents=''; title=''; lasttitle1=''; lasttitle2=''; lasttitle3=''; i=0


	for i in range(len(filenames)): #Looping through each file...
	if filenames[i]!='.DS_Store': #Don't read this file (which causes Python to crash)
	filename=dirname+filenames[i] #Create the full path to ech file
	file=open(filename,'r', encoding='utf-8') #Open the file for reading (in unicode)
	contents=file.read() #Put the entire contents of the file in a variable called "contents"
	file.close() #Close the file (so you don't overwrite it later)
	if '<journaltitle>' in contents: #If the file comes from a journal...
	start=contents.find('<journaltitle>')+14 #Find the start of the journal's title
	end=contents.find('</journaltitle>') #Find the end of the journal's title
	title=contents[start:end] #Put everything in between in a variable called "title"
	#First, check that you haven't seen this title recently
	if title!=lasttitle1 and title!=lasttitle2 and title!=lasttitle3:
	#Then, check if the journal title is one of those in which you are particularly interested.
	if 'Psych' in title or 'Philos' in title or 'Monist' in title or title=='Science' or 'Scientific Monthly' in title:
	print()
	print('***',title, filenames[i][8:]) #If it is, highlight the title on the screen.
	print()
	else:
	print(title, filenames[i][8:]) #Display other journal title on the screen too.
	lasttitle3=lasttitle2 #Move your recently-seen titles up the line.
	lasttitle2=lasttitle1
	lasttitle1=title


	#END OF PROGRAM
	#PROGAM NAME: move_psychphil.py (ver 1.0)

	# Extracts article titles, author names, volume numbers, and page numbers
	# for Psych, Philos, and some science journals in the JSTOR EARLY JOURNAL
	# CONTENT DATA BUNDLE. Displays these on screen and creates file names to
	# copy them to their own folder (but does not actually do the copying).
	# Also creates .csv list of articles from The Monist only.

	# Written by Christopher D Green
	# January 2014
	# Modified April 2014 to not move files, but just list their original file names in the Excel file.

	# START PROGRAM
	import os #needed to navigate directory tree of resident harddisk

	#set from and to directories
	fromdirname='/Users/chriso/Desktop/JSTOR-journals/bundle/'
	todirname='/Users/chriso/Desktop/JSTOR-journals/temp/'

	#read filenames from directory
	filenames=os.listdir(fromdirname)

	#initialize some variables
	contents=''; title=''; lasttitle1=''; lasttitle2=''; lasttitle3=''; i=0
	jnum=-1; volnum='0'; pnum='0'

	#setting up csv file that lists authors, titles, etc. for Phil Review articles
	listfile=open(todirname+'Monist.csv', 'w')
	listfile.write('year,vol,page,author,title\n')
	listfile.close

	#list of desired journals and the abbreviations to be used in filenames
	journals=[['The Journal of Philosophy, Psychology and Scientific Methods',
	'The Philosophical Review', 'The Monist'],
	['JPPSM','PhilRev','Monist']]
	#other interesting journals: The American Journal of Psychology, Science,
	# The Scientific Monthly, The Journal of Speculative Philosophy

	for i in range(len(filenames)):
	if filenames[i]!='.DS_Store':
	filename=fromdirname+filenames[i]
	#print(filenames[i])
	file=open(filename,'r', encoding='utf-8')
	contents=file.read()
	file.close()
	if '<journaltitle>' in contents:
	start=contents.find('<journaltitle>')+14 #find start of journal title
	end=contents.find('</journaltitle>') #find end of journal title
	jtitle=contents[start:end] #get journal title
	if jtitle in journals[0]: #is it one of the journal I want?
	jnum=journals[0].index(jtitle)
	typeloc=contents.find('<type>') #is it a full article?
	if contents[typeloc+6:typeloc+9]=='fla':
	#print ('.', end='')
	print(jtitle)
	volstart=contents.find('<volume>')+8 #find start of volume number
	volend=contents.find('</volume>') #find end of volume number
	volnum=contents[volstart:volend] #get volume number

	pstart=contents.find('<fpage>')+7 #find start of page number
	pend=contents.find('</fpage>') #find end of page number
	pnum=contents[pstart:pend] #get initial page number

	yearstart=contents.find('<year>')+6 #find start of year
	year=contents[yearstart:yearstart+4] #get volume number


	atitlestart=contents.find('<title>')+7 #find start of article title
	atitleend=contents.find('</title>') #find end of article title
	atitle=contents[atitlestart:atitleend] #get article
	while ',' in atitle:
	commaloc=atitle.find(',') #find commmas in atitle...
	atitle=atitle[:commaloc]+'`'+atitle[commaloc+1:] #...replace with `
	print(atitle)

	authorstart=contents.find('<authors>\n <list-item>')+25 #find start of author's name
	authorend=''; k=0; author='' #finding the end of the author's name is a little more complex.
	while '<' not in author: #the < marks the </list-item> tag at the end of the 1st author's name
	author=author+contents[authorstart+k] #accumulate author's name character by character
	k=k+1 #ideally, we would like to list all authors. Haven't done that here.
	author=author[:-1] #removes the < at the end of the author's name
	if author=='P. C.':
	author='Paul Carus'

	revauthor='';k=1; #putting lastname first
	while len(author)-k>=0 and author[len(author)-k] != ' ': #finding the space
	k=k+1 #between the first & last names
	lastname=author[len(author)-k+1:]
	lastname.lower(); lastname.capitalize() #setting Lastname
	firstname=author[:len(author)-k+1] #(if it was all CAPS)
	firstname.lower(); firstname.capitalize() #setting Firstname
	#Occasionally names come out all in CAPS anyway. Check by hand.
	revauthor=lastname+firstname

	while ' ' in revauthor: #removing spaces
	spaceloc=revauthor.find(' ')
	revauthor=revauthor[:spaceloc]+revauthor[spaceloc+1:]
	while '.' in revauthor: #removing dots
	dotloc=revauthor.find('.')
	revauthor=revauthor[:dotloc]+revauthor[dotloc+1:]
	#This process still fails on names with Jr. at the end.
	#These must be corrected by hand at present.

	if len(author) < 3:
	revauthor='NoAuthor'
	print(author)

	#assemble name of file to which article will be saved
	outfilename=journals[1][jnum]+'.'+volnum+'.'+pnum+'.txt'
	fullpathname=todirname+outfilename
	#print(fullpathname)
	print()

	fullpath=open(fullpathname, 'w', encoding='utf-8') #writes files to a new directory
	fullpath.write(contents)
	fullpath.close()
	#print ('*', end='')

	if jtitle=='The Monist': #Creates csv list of Monist articles
	listfile=open(todirname+'Monist.csv', 'a', encoding='utf-8')
	listfile.write(filenames[i]+','+year+','+volnum+','+pnum+','+revauthor+','+atitle+','+outfilename+'\n')
	listfile.close()
	if jtitle=='The Journal of Philosophy, Psychology and Scientific Methods': #Creates csv list of JPPSM articles
	listfile=open(todirname+'JPPSM.csv', 'a', encoding='utf-8')
	listfile.write(filenames[i]+','+year+','+volnum+','+pnum+','+revauthor+','+atitle+','+outfilename+'\n')
	listfile.close()
	if jtitle=='The Philosophical Review': #Creates csv list of Phil Rev articles
	listfile=open(todirname+'PhilRev.csv', 'a', encoding='utf-8')
	listfile.write(filenames[i]+','+year+','+volnum+','+pnum+','+revauthor+','+atitle+','+outfilename+'\n')
	listfile.close()



	# END PROGRAM