Skip to content

Instantly share code, notes, and snippets.

@yuxincs
Last active April 28, 2019 19:33
Show Gist options
  • Save yuxincs/c4b7a7da90566f4b9b237f144d0afe55 to your computer and use it in GitHub Desktop.
Save yuxincs/c4b7a7da90566f4b9b237f144d0afe55 to your computer and use it in GitHub Desktop.
ArXiv Cleaner
import argparse
import os
import re
import logging
import shutil
logging.basicConfig(format='%(levelname)s: %(message)s')
_comment = re.compile(r'\\begin\{comment\}[\s\S]*\\end{comment}|(?<!\\)%.*\n')
_empty_line = re.compile(r'\n\s*\n')
_graphics = re.compile(r'\\includegraphics(\[.*\]|)\{(.*)\}')
_tex = re.compile(r'\\input\{(.*)\}')
def clean_file(folder_path, file, visited):
arxiv_folder = os.path.join(folder_path, 'arxiv')
with open(os.path.join(folder_path, file), 'r') as f:
content = f.read()
# remove comments
content = _comment.sub('', content)
content = _empty_line.sub('\n\n', content)
# copy figures
for _, figure in _graphics.findall(content):
shutil.copy(os.path.join(folder_path, figure),
os.path.join(arxiv_folder, figure.replace('/', '-')))
# rename the reference to figures to correct one.
content = _graphics.sub(lambda figure: figure.group().replace('/', '-'), content)
visited.add(file)
for ref_tex in _tex.findall(content):
ref_tex = '{}.tex'.format(ref_tex) if not ref_tex.endswith('.tex') else ref_tex
if ref_tex not in visited:
clean_file(folder_path, ref_tex, visited)
visited.add(ref_tex)
with open(os.path.join(arxiv_folder, '{}'.format(file)), 'w') as write_f:
write_f.write(content)
def main(folder, main_tex):
folder_path = os.path.abspath(folder)
arxiv_folder = os.path.join(folder_path, 'arxiv')
if os.path.exists(arxiv_folder):
shutil.rmtree(arxiv_folder)
os.mkdir(arxiv_folder)
visited = set()
clean_file(folder_path, main_tex, visited)
for file in os.listdir(folder_path):
if os.path.join(folder_path, file):
# report not used tex files
if file.endswith('.tex') and file not in visited:
logging.warning('{} not used, ignored.'.format(file))
if file.endswith('.bbl') or file.endswith('.cls') or file.endswith('bst') or file.endswith('.sty'):
# copy auxiliary files
shutil.copy(os.path.join(folder_path, file), os.path.join(arxiv_folder, file))
# check if bbl file is present
if len(tuple(filter(lambda file: file.endswith('.bbl'), os.listdir(arxiv_folder)))) == 0:
logging.warning('no .bbl file is copied, forgot to run `bibtex`? Ignore this if you do not use `bibtex`')
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser(description=__doc__)
arg_parser.add_argument('folder', metavar='FOLDER', type=str, nargs=1,
help='The precompiled latex folder.')
arg_parser.add_argument('-m', '--main', type=str, required=False, default='main.tex',
help='The main tex file.')
results = arg_parser.parse_args()
main(results.folder[0], results.main)
@yuxincs
Copy link
Author

yuxincs commented Apr 28, 2019

This script cleans up all comments in your tex source files, including those using comment package (\begin{comment}xx\end{comment}), move your figures out to the root folder and rename them properly.

Make sure you have .bbl already generate if you use bibtex.
Run python arxiv_cleaner.py <your_tex_source_folder> and the script will generate an arxiv folder inside <your_tex_source_folder> containing all files necessary to upload to arXiv.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment