Last active
April 28, 2019 19:33
-
-
Save yuxincs/c4b7a7da90566f4b9b237f144d0afe55 to your computer and use it in GitHub Desktop.
ArXiv Cleaner
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
import re | |
import logging | |
import shutil | |
logging.basicConfig(format='%(levelname)s: %(message)s') | |
_comment = re.compile(r'\\begin\{comment\}[\s\S]*\\end{comment}|(?<!\\)%.*\n') | |
_empty_line = re.compile(r'\n\s*\n') | |
_graphics = re.compile(r'\\includegraphics(\[.*\]|)\{(.*)\}') | |
_tex = re.compile(r'\\input\{(.*)\}') | |
def clean_file(folder_path, file, visited): | |
arxiv_folder = os.path.join(folder_path, 'arxiv') | |
with open(os.path.join(folder_path, file), 'r') as f: | |
content = f.read() | |
# remove comments | |
content = _comment.sub('', content) | |
content = _empty_line.sub('\n\n', content) | |
# copy figures | |
for _, figure in _graphics.findall(content): | |
shutil.copy(os.path.join(folder_path, figure), | |
os.path.join(arxiv_folder, figure.replace('/', '-'))) | |
# rename the reference to figures to correct one. | |
content = _graphics.sub(lambda figure: figure.group().replace('/', '-'), content) | |
visited.add(file) | |
for ref_tex in _tex.findall(content): | |
ref_tex = '{}.tex'.format(ref_tex) if not ref_tex.endswith('.tex') else ref_tex | |
if ref_tex not in visited: | |
clean_file(folder_path, ref_tex, visited) | |
visited.add(ref_tex) | |
with open(os.path.join(arxiv_folder, '{}'.format(file)), 'w') as write_f: | |
write_f.write(content) | |
def main(folder, main_tex): | |
folder_path = os.path.abspath(folder) | |
arxiv_folder = os.path.join(folder_path, 'arxiv') | |
if os.path.exists(arxiv_folder): | |
shutil.rmtree(arxiv_folder) | |
os.mkdir(arxiv_folder) | |
visited = set() | |
clean_file(folder_path, main_tex, visited) | |
for file in os.listdir(folder_path): | |
if os.path.join(folder_path, file): | |
# report not used tex files | |
if file.endswith('.tex') and file not in visited: | |
logging.warning('{} not used, ignored.'.format(file)) | |
if file.endswith('.bbl') or file.endswith('.cls') or file.endswith('bst') or file.endswith('.sty'): | |
# copy auxiliary files | |
shutil.copy(os.path.join(folder_path, file), os.path.join(arxiv_folder, file)) | |
# check if bbl file is present | |
if len(tuple(filter(lambda file: file.endswith('.bbl'), os.listdir(arxiv_folder)))) == 0: | |
logging.warning('no .bbl file is copied, forgot to run `bibtex`? Ignore this if you do not use `bibtex`') | |
if __name__ == '__main__': | |
arg_parser = argparse.ArgumentParser(description=__doc__) | |
arg_parser.add_argument('folder', metavar='FOLDER', type=str, nargs=1, | |
help='The precompiled latex folder.') | |
arg_parser.add_argument('-m', '--main', type=str, required=False, default='main.tex', | |
help='The main tex file.') | |
results = arg_parser.parse_args() | |
main(results.folder[0], results.main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script cleans up all comments in your tex source files, including those using
comment
package (\begin{comment}xx\end{comment}
), move your figures out to the root folder and rename them properly.Make sure you have
.bbl
already generate if you usebibtex
.Run
python arxiv_cleaner.py <your_tex_source_folder>
and the script will generate anarxiv
folder inside<your_tex_source_folder>
containing all files necessary to upload to arXiv.