Skip to content

Instantly share code, notes, and snippets.

@steven-murray
Created March 17, 2022 22:05
Show Gist options
  • Save steven-murray/d10568b2f1462123e34bfbecacf2a662 to your computer and use it in GitHub Desktop.
Save steven-murray/d10568b2f1462123e34bfbecacf2a662 to your computer and use it in GitHub Desktop.
Manage your ADS publications!
"""
Super simple script for managing all your ADS publications.
Installation and Setup
----------------------
To setup for calling the script, you must install ads::
$ pip install git+git://github.com/steven-murray/ads
and also the following deps::
$ pip intsall questionary rich
You also must create an ADS API key (go to your settings page on ADS)
and save it in `~/.ads/dev_key`.
There is no installation of this script, you just run it from the command line::
$ python manage_my_pubs.py
Use `--help` to get options.
Settings
--------
The settings are towards the top of the file -- you should modify them for yourself.
Maybe in the future it can take in a YAML config. Most of them should be self-explanatory.
The `my_library` and `my_false_lib` settings are the refrences to 2 ADS Libraries.
The first should be your canonical library holding all of your works (this is what
this script updates!). The second is a library in which you can put papers that are
NOT yours, but which turn up under your search (i.e. there's another researcher with
the same name who has collaborated with someone from the same institution, etc.).
How it Works
------------
The script defines an ADS search query that should be designed to find *all* your papers.
For some people, this search query will also turn up some papers that are *not* yours.
The script will go through all the papers, and if any of them aren't in your library, it
will ask you whether it should put them there (or in your false positive list, so they
don't reappear every time). It will also do a check to see if all your papers have been
claimed in ORCID (it will also do a check to ensure all your ORCID papers are in your library,
regardless of whether they turned up in your search). Furthermore, it will check all the papers
in your library + ORCID, and if any do *not* come up in your search term, it will let you know,
so you can generalize your search term a bit.
In the end, if you run this script reasonably regularly, you should be able to point people
towards your "official" library, knowing that it is always up-to-date.
Finally, the script will print out some stats about your body of work!
"""
import sys
import ads
import argparse
from datetime import datetime
import questionary as qs
from ads.libraries import Library
from rich.console import Console
from ads.metrics import MetricsQuery
from rich.rule import Rule
from urllib.parse import quote
from rich.table import Table
import json
from pathlib import Path
# ==============================================
# USER SETTINGS
# ==============================================
name = "Murray, Steven G."
year_started = 2013
orcid = "0000-0003-3059-3823"
collection = 'astronomy'
my_library = "qfT0ZuGSRCWBI5sG0rl5hw"
my_false_lib = "7-Ft-lBdTCSoe56OQkETSQ"
affiliations = [
"School of Earth and Space Exploration",
"U West Austr",
"UWA",
"University of Western Australia",
"Curtin",
"ASU",
"Arizona State University" ,
"-"
]
# ---------------------------------------------
console = Console()
now = datetime.now()
parser = argparse.ArgumentParser(description='Print out all LoCo publicatoins.')
parser.add_argument('--no-search-check', action='store_true', help="don't perform the check on the search")
args = parser.parse_args()
affiliations = ['"' + aff + '"' for aff in affiliations]
def print_paper(p, fmt=True):
if fmt:
return f"[bold]{p.author[0]}:[/bold] '{p.title[0]}' ({p.year})"
else:
return f"{p.author[0]}: '{p.title[0]}' ({p.year})"
my_library = Library(my_library)
my_false_lib = Library(my_false_lib)
fl=['author', 'title', 'bibcode', 'year', 'affiliations']
console.print(Rule("Retrieving Papers"))
console.print("Retrieving library papers...", end=' ')
lib_papers = set(ads.SearchQuery(
q=f'docs(library/{my_library.id})',
max_pages=100,
fl=fl,
))
console.print(f"found {len(lib_papers)} papers.")
console.print("Retrieving dynamic search papers...", end=' ')
if args.no_search_check:
qq = f"NOT docs(library/{my_library.id})"
else:
qq = ''
q = f'author:"{name}" collection:{collection} aff:({" OR ".join(affiliations)}) year:{year_started}- NOT docs(library/{my_false_lib.id}) {qq}'
search_papers = set(ads.SearchQuery(
q=q,
max_pages=100,
fl=fl,
))
console.print(f"found {len(search_papers)} papers.")
console.print(f"Search Query: {q}")
console.print("Retrieving orcid papers...", end=' ')
orcid_papers = set(ads.SearchQuery(
q=f'orcid:{orcid}',
max_pages=100,
fl=fl,
))
console.print(f"found {len(orcid_papers)} papers.")
# Put all new papers in my library
new = search_papers.union(orcid_papers).difference(lib_papers)
if new:
console.print(f"Found {len(new)} new papers, please select whether each one should be in your library:")
keep = []
no_keep = []
for p in new:
res = qs.rawselect(print_paper(p), choices=['yes', 'no', 'not sure', 'cancel']).ask()
if res =='yes':
keep.append(p)
elif res=='no':
no_keep.append(p)
elif res=='cancel':
sys.exit(0)
my_library.add_documents(keep)
lib_papers = lib_papers.union(set(keep))
my_false_lib.add_documents(no_keep)
else:
console.print("[bold]Found no new papers!")
console.print()
# Check that all ORCID papers and library papers are found via dynamic search
not_found = orcid_papers.union(lib_papers).difference(search_papers)
if not_found:
console.print(Rule("Search Check"))
console.print("The following papers are associated with your ORCID or in your library, but not found in your search. Update your search terms!")
for p in not_found:
console.print(f" {print_paper(p)}")
# Check that all of my work is in ORCID
not_orcid = lib_papers.difference(orcid_papers)
if not_orcid:
console.print(Rule())
url = "https://ui.adsabs.harvard.edu/search/q=" + quote(f"bibcode:({' OR '.join([p.bibcode for p in not_orcid])})")
console.print("The following papers are in your library, but not associated with ORCID. Go to adsabs to claim them:")
console.print(f" > '{url}'")
console.print()
for p in not_orcid:
console.print(f" {print_paper(p)}")
cfile = Path("~/.ads-stats.json").expanduser()
if cfile.exists():
with open(cfile, 'r') as fl:
old_m = json.load(fl)
else:
old_m = {}
# Print out stats!
m = MetricsQuery([p.bibcode for p in lib_papers]).execute()
console.print()
console.print(Rule("Stats"))
if old_m:
old_time = datetime.strptime(old_m['time'], '%Y-%m-%d %H:%M')
console.print("Last time stats were taken: ", old_m["time"], "({(now - old_time).days} ago))")
else:
console.print("Stats never before taken!")
def get_entry(t, k):
full = m[t][k]
old = old_m.get(t, {}).get(k)
if old:
if full < old:
entry = f"{full:.1f} [red](-{old-full:.1f})[/red]"
elif full == old:
entry = f"{full:.1f}"
else:
entry = f"{full:.1f} [green]({full-old:.1f})[/green]"
else:
entry = f"{full:.1f}"
return entry
for t in ['basic stats', 'citation stats', 'indicators']:
table = Table(title=t)
table.add_column("Metric", justify='right')
table.add_column("Total", justify="left", style="cyan", no_wrap=True)
table.add_column("Refereed", justify="left", style="cyan", no_wrap=True)
for k in m[t]:
if not isinstance(m[t][k], list):
entry = get_entry(t, k)
ref_entry = get_entry(f'{t} refereed', k)
table.add_row(k, entry, ref_entry)
console.print(table)
console.print()
m['time'] = now.strftime('%Y-%m-%d %H:%M')
with open(cfile, 'w') as fl:
json.dump(m, fl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment