Skip to content

Instantly share code, notes, and snippets.

@keflavich
Last active April 19, 2023 21:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keflavich/b7762ef34f5b5d715590f0d92ad2137c to your computer and use it in GitHub Desktop.
Save keflavich/b7762ef34f5b5d715590f0d92ad2137c to your computer and use it in GitHub Desktop.
paper2arxiv
from __future__ import print_function
import pdb
import re,os,time
import argparse
import shutil
from six import string_types as basestring
from os.path import join
from astropy import log
log.setLevel('DEBUG')
parser = argparse.ArgumentParser()
parser.add_argument("--reconvert",default=False,action='store_true')
#parser.add_argument("--arxiv",default=True,action='store_true')
parser.add_argument("--apj",default=False,action='store_true')
parser.add_argument("--texit",default=False,action='store_true')
parser.add_argument("--bibit",default=True)
args = parser.parse_args()
print("ARGS: ",args)
ppath='.'
paper_name = 'sample63'
file = open(os.path.join(ppath,paper_name+'.tex'),'r')
figlistfile = open(os.path.join(ppath, 'figure_list.txt'), 'w')
full_figure_list = []
isarxiv = not args.apj
outdir = "apj/" if not isarxiv else "arxiv/"
outtype = "apj" if not isarxiv else "arxiv"
print("Converting package to {0} format".format(outtype))
if not os.path.exists(join(ppath,outdir)):
os.mkdir(join(ppath,outdir))
#shutil.copy('aamacros.tex', outdir)
#shutil.copy('aa.cls', outdir)
#shutil.copy('aa.bst', outdir)
#shutil.copy('apjmacros.tex', outdir)
#shutil.copy('macros.tex', outdir)
#shutil.copy('emulateapj.cls', outdir)
shutil.copy('aastex63.cls', outdir)
#shutil.copy('aasjournal.bst', outdir)
outfn = join(ppath,outtype+'form_temp.tex')
print("Creating temporary file: {0}".format(outfn))
outf = open(outfn,'w')
inputre = re.compile('input{(.*)}')
includere = re.compile('include{(.*)}')
bibre = re.compile('bibliography{(.*)}')
aandare = re.compile("documentclass{aa}")
# \documentclass{aa}
beginre = re.compile(r'\\begin{document}')
endre = re.compile(r'\\end{document}')
prefacere = re.compile(r'\\input{preface.*}')
solobibre = re.compile(r"\\input{(solobib)(.tex)?}")
def strip_input(list_of_lines):
# strip out preface, solobib, end{doc}
#return "".join(list_of_lines[1:-2])
return "".join(
[line
for line in list_of_lines
if not prefacere.search(line)
#and not solobibre.search(line)
and not endre.search(line)
and not beginre.search(line)
]
)
def dobib(bib, outf):
#bn = bib.groups()[0] + '.bbl'
bn = paper_name+".bbl"
print("Doing bibliography " + bn)
with open(join(ppath,bn),'r') as f:
print(strip_input(f.readlines()), end='', file=outf)
spacecount=0
for ii,line in enumerate(file.readlines()):
if line[0] == "%":
continue
elif line[0] == " ":
spacecount += 1
input = inputre.search(line)
include = includere.search(line)
bib = bibre.search(line)
solobib = solobibre.search(line)
if solobib is not None:
fn = solobib.groups()[0] + ".tex"
print(ii, "Doing solobib " + fn)
with open(os.path.join(ppath,fn),'r') as f:
solobib = f.readlines()
for ln in solobib:
if ln[0].strip() == "%":
continue
bib = bibre.search(ln)
if bib is not None:
print(ii,"Bib matched: ",bib.groups())
dobib(bib,outf)
elif input is not None:
fn = os.path.splitext(input.groups()[0])[0] + ".tex"
if fn.count('.') > 1:
raise ValueError(f"Too many dots in filename: {fn}")
print(ii, "Doing input " + fn)
with open(os.path.join(ppath,fn),'r') as f:
if 'preface' in line:
print(f.read(), end='', file=outf)
else:
print(strip_input(f.readlines()), end='', file=outf)
elif include is not None:
fn = os.path.splitext(include.groups()[0])[0] + ".tex"
print(ii, "Doing include " + fn)
f = open(os.path.join(ppath,fn),'r')
if 'preface' in line:
print(f.read(), end='', file=outf)
else:
print(strip_input(f.readlines()), end='', file=outf)
f.close()
elif bib is not None:
print(ii, "Doing bib (no solo) " + bib.groups()[0])
dobib(bib,outf)
else:
print(line, end="", file=outf, sep="")
print("Spacecount = {0}".format(spacecount))
outf.close()
file.close()
file = open(outfn,'r')
outfn = join(ppath, outtype+'form.tex')
outf = open(outfn,'w')
default_suffix='.pdf'
figre = re.compile('Figure{{?(.*?)}?}')
#figre = re.compile('Figure\n?{{?(.*?)}?}?',flags=re.MULTILINE)
figre1line = re.compile('^\\\Figure',)
fig2re = re.compile('R?o?t?FigureTwoA?A?\n?{{?(.*?)}?}\n?\s*{(.*?)}?}',flags=re.MULTILINE)
fig2re1line = re.compile('^\\\R?o?t?FigureTwoA?A?\n?({{?(.*?)}?})?', flags=re.MULTILINE)
fig3re = re.compile('R?o?t?FigureThreeA?A?\n?{{?(.*?)}?}\n?\s*{(.*?)}?}',flags=re.MULTILINE)
fig3re1line = re.compile('^\\\R?o?t?FigureThreeA?A?\n?({{?(.*?)}?})?', flags=re.MULTILINE)
fig4re = re.compile('FigureFourP?D?F?\n?\s*{{?(.*?)}?}\n\s*?{{?(.*?)}}?\n\s*?{{?(.*?)}}?\n\s*?{{?(.*?)}}?',flags=re.MULTILINE)
fig4re1line = re.compile('FigureFourP?D?F?({{?(.*?)}?})?',flags=re.MULTILINE)
fig1colre = re.compile('FigureOneCol{{?(.*?)}?}')
#figre = re.compile('Figure\n?{{?(.*?)}?}?',flags=re.MULTILINE)
fig1colre1line = re.compile('^\\\FigureOneCol',)
plotonere = re.compile('plotone{{?(.*?)}')
includegre = re.compile('includegraphics\[.*\]{{?(.*?)}?}')
fig_suffixes = "png|eps|pdf"
lonelygraphics = re.compile('^\s*{{?(.*?(%s)?)}?}' % fig_suffixes)
out_suffix = 'png'
count = 1
for ii,line in enumerate(file.readlines()):
aanda = aandare.search(line)
if line[0] == "%":
continue
#elif aanda is not None:
# print "AandA -> aastex"
# print >>outf,"\\documentclass[12pt,preprint]{aastex}",
# f.close()
# continue
fig1b = figre1line.search(line)
fig1 = figre.search(line)
fig1colb = fig1colre1line.search(line)
fig1col = fig1colre.search(line)
fig2 = fig2re.search(line)
fig2b = fig2re1line.search(line)
fig3 = fig3re.search(line)
fig3b = fig3re1line.search(line)
fig4 = fig4re.search(line)
fig4b = fig4re1line.search(line)
pone = plotonere.search(line)
lonely = lonelygraphics.search(line)
igr = includegre.search(line)
if igr is not None:
fign = igr.groups()[0]
prevline = ''
elif fig1 is not None:
fign = fig1.groups()[0]
prevline = ''
elif fig1col is not None:
fign = fig1col.groups()[0]
prevline = ''
elif fig2 is not None:
fign = fig2.groups()[0:2]
prevline = ''
elif fig3 is not None:
fign = fig3.groups()[0:3]
prevline = ''
elif fig4 is not None:
fign = fig4.groups()[0:4]
prevline = ''
elif fig4b is not None:
fign = [n for n in fig4b.groups() if n is not None]
nfigs = len(fign)
prevline = 'fig4'
elif fig2b is not None:
fign = fig2b.groups()[1]
if fign in (None,''):
fign = None
nfigs = 0
else:
nfigs = 1
prevline = 'fig2'
elif fig3b is not None:
fign = fig3b.groups()[1]
if fign in (None,''):
fign = None
nfigs = 0
else:
nfigs = 1
prevline = 'fig3'
elif pone is not None:
fign = pone.groups()[0]
prevline = ''
elif fig1b is not None:
if 'Two' in line:
print("Two in line: ",line)
raise ValueError(f"The word 'Two' is in the line {line}")
#print "Found solo figure"
fign = None#fig1b.groups()[0]
nfigs = 0
prevline = 'fig1'
elif fig1colb is not None:
fign = None
nfigs = 0
prevline = 'fig1'
elif lonely is not None and prevline in ('fig4','fig2','fig3','fig1'):
# DEBUG print "in lonely: ",lonely.groups()," nfigs=",nfigs," prevline=",prevline
fign = lonely.groups()[0]
nfigs += 1
if nfigs >= int(prevline[-1]):
prevline = ''
elif lonely is not None:
pass
#print "found a lonely non-figure: ",lonely.group(), lonely.groups()
else:
fign=None
#if 'prevline' in locals() and prevline != '':
# print "{1}: Found prevline={0}, setting to ''".format(prevline, line.strip())
prevline = ''
if fign:
if fign[-3:] in ('pdf', 'png'):
figlistfile.write('{0}\n'.format(fign))
full_figure_list.append(fign)
else:
figlistfile.write('{0}.pdf\n'.format(fign))
full_figure_list.append(fign+".pdf")
## special case for lines that contain .'s specifically for w51_alma_00308
#if "}.pdf}" in line:
# line = line.replace("}.pdf}", "}.png}")
if line.strip() == "\FigureTwoAA":
assert prevline == 'fig2'
input = inputre.search(line)
if fign is not None:
#DEBUG print "Found fign: %s" % fign
print(f"Found fign: {fign}")
if fign in ("#1","#2","#3","#4"):
print(line, end="", file=outf)
continue
if len(fign) == 1 or isinstance(fign,basestring):
figlist = [fign]
else:
figlist = fign
print("Figlist: ",figlist)
outline = line
for fign in figlist:
fignroot = fign
log.debug("fign: {0}".format(fign))
if fign[-4] != '.': # if no suffix, add one
log.debug("Adding default suffix {0} to file {1}".format(default_suffix, fign))
fign += default_suffix
insuffix = default_suffix.lstrip(".")
else:
insuffix = os.path.splitext(fign)[1].lstrip(".")
fn = join(ppath, fign)
#if fign[-3:] == "png":
# #if args.reconvert:
# # os.system("pngtoeps %s" % fign)
# log.debug("Changing png to new suffix {0}".format(out_suffix))
# fn = join(ppath,fign.replace("png",out_suffix))
#elif fign[-3:] == "svg":
# #if args.reconvert:
# # os.system("svg2eps %s" % fign)
# log.debug("Changing svg to new suffix {0}".format(out_suffix))
# fn = join(ppath, fign.replace("svg",out_suffix))
#elif fign[-3:] == "pdf":
# #if args.reconvert:
# # os.system("pdf2ps %s %s" % (fign,fign.replace("pdf","ps")))
# # os.system("mv %s %s" % (fign.replace('pdf','ps'),fign.replace('pdf','eps')))
# log.debug("Changing pdf to new suffix {0}".format(out_suffix))
# fn = join(ppath, fign.replace("pdf",out_suffix))
#elif fign[-3:] != out_suffix:
# nroot = os.path.splitext(fign)[0]
# log.debug("Changing {1} to new suffix {0}".format(out_suffix, nroot))
# fn = os.path.join(ppath,nroot+"."+out_suffix)
#elif fign[-3:] == "tex":
# raise TypeError("Figure is a text file? %s" % fign)
#else:
# fn = join(ppath,fign)
if not os.path.exists(fn):
raise IOError("File {0} does not exist; maybe you need to make a PDF first? "
"(gs apparently can't convert pngs to pdfs: http://stackoverflow.com/questions/20483600/convert-png-to-pdf-using-ghostscript"
.format(fn))
print("Converting figure " + fn + " to f%i.%s" % (count,out_suffix))
outfig = 'f%i.%s' % (count,out_suffix)
outpath = os.path.join(ppath, outdir, outfig)
if isarxiv and insuffix != out_suffix:
gscmd = 'gs -dSAFER -dBATCH -dNOPAUSE -dAutoRotatePages=/None -dPDFSETTINGS=/screen -sDEVICE=pdfwrite -sOutputFile={1} {0}'.format(fn, outpath)
rslt = os.system(gscmd)
if rslt != 0:
raise ValueError(f"Ghostscript failed for command {gscmd}")
else:
try:
shutil.copy(fn, outpath)
except Exception as ex:
raise ex
if isarxiv:
outline = outline.replace(fignroot,os.path.splitext(outfig)[0])
else:
#print(outline.replace(fignroot,outfig), " vs ", outline.replace(fignroot,os.path.splitext(outfig)[0]))
outline = outline.replace(fignroot,outfig)
count += 1
print(outline, end="", file=outf)
elif input is not None:
print(f"Processing input line {input} from line {line}")
fn = os.path.splitext(input.groups()[0])[0] + ".tex"
print("Doing input " + fn)
f = open(os.path.join(ppath,fn),'r')
if 'preface' in line:
print(f.read(), end="", file=outf)
else:
print(strip_input(f.readlines()), end="", file=outf)
f.close()
elif 'figures/' in line and 'graphicspath' not in line:
raise ValueError(f"Found a line '{line}' with figures/ in it, but it wasn't converted.")
else:
print(line, end="", file=outf)
outf.close()
file.close()
figlistfile.close()
if os.path.exists('figures_to_upload'):
shutil.rmtree('figures_to_upload')
os.mkdir('figures_to_upload')
for fn in set(full_figure_list):
try:
os.link(fn, 'figures_to_upload/{0}'.format(os.path.split(fn)[-1]))
except Exception as ex:
print(ex)
os.chdir(ppath)
os.system('cp %s %s/ms.tex' % (outfn,outdir))
if os.path.exists(outdir+'/Makefile'):
os.chdir(outdir)
os.system('make')
os.system('rm ms.dvi')
os.system('rm ms.ps')
os.system('rm ms.aux')
os.system('rm ms.log')
os.chdir(ppath)
os.system('mv %s/ms.pdf %s_draft%s.pdf' % (outdir,paper_name,time.strftime("%m%d",time.localtime())))
if args.texit:
os.chdir(outdir)
os.system('pdflatex ms.tex')
os.system('pdflatex ms.tex')
if args.bibit:
os.system('bibtex ms')
os.system('bibtex ms')
os.system('pdflatex ms.tex')
for junk in 'dvi','ps','aux','log':
try:
os.system('rm ms.'+junk)
except IOError:
pass
os.chdir(ppath)
if isarxiv:
os.system('mv %s/ms.pdf %s_draft%s_arxiv.pdf' % (outdir,paper_name,time.strftime("%m%d",time.localtime())))
else:
os.system('mv %s/ms.pdf %s_draft%s_aanda.pdf' % (outdir,paper_name,time.strftime("%m%d",time.localtime())))
os.system('tar --exclude Makefile -czf Jeff_SgrB2DS2023_%s_%s.tar.gz %s/ ' % (time.strftime("%m%d",time.localtime()),outtype,outdir))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment