Created
March 17, 2022 22:05
-
-
Save steven-murray/d10568b2f1462123e34bfbecacf2a662 to your computer and use it in GitHub Desktop.
Manage your ADS publications!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Super simple script for managing all your ADS publications. | |
Installation and Setup | |
---------------------- | |
To setup for calling the script, you must install ads:: | |
$ pip install git+git://github.com/steven-murray/ads | |
and also the following deps:: | |
$ pip intsall questionary rich | |
You also must create an ADS API key (go to your settings page on ADS) | |
and save it in `~/.ads/dev_key`. | |
There is no installation of this script, you just run it from the command line:: | |
$ python manage_my_pubs.py | |
Use `--help` to get options. | |
Settings | |
-------- | |
The settings are towards the top of the file -- you should modify them for yourself. | |
Maybe in the future it can take in a YAML config. Most of them should be self-explanatory. | |
The `my_library` and `my_false_lib` settings are the refrences to 2 ADS Libraries. | |
The first should be your canonical library holding all of your works (this is what | |
this script updates!). The second is a library in which you can put papers that are | |
NOT yours, but which turn up under your search (i.e. there's another researcher with | |
the same name who has collaborated with someone from the same institution, etc.). | |
How it Works | |
------------ | |
The script defines an ADS search query that should be designed to find *all* your papers. | |
For some people, this search query will also turn up some papers that are *not* yours. | |
The script will go through all the papers, and if any of them aren't in your library, it | |
will ask you whether it should put them there (or in your false positive list, so they | |
don't reappear every time). It will also do a check to see if all your papers have been | |
claimed in ORCID (it will also do a check to ensure all your ORCID papers are in your library, | |
regardless of whether they turned up in your search). Furthermore, it will check all the papers | |
in your library + ORCID, and if any do *not* come up in your search term, it will let you know, | |
so you can generalize your search term a bit. | |
In the end, if you run this script reasonably regularly, you should be able to point people | |
towards your "official" library, knowing that it is always up-to-date. | |
Finally, the script will print out some stats about your body of work! | |
""" | |
import sys | |
import ads | |
import argparse | |
from datetime import datetime | |
import questionary as qs | |
from ads.libraries import Library | |
from rich.console import Console | |
from ads.metrics import MetricsQuery | |
from rich.rule import Rule | |
from urllib.parse import quote | |
from rich.table import Table | |
import json | |
from pathlib import Path | |
# ============================================== | |
# USER SETTINGS | |
# ============================================== | |
name = "Murray, Steven G." | |
year_started = 2013 | |
orcid = "0000-0003-3059-3823" | |
collection = 'astronomy' | |
my_library = "qfT0ZuGSRCWBI5sG0rl5hw" | |
my_false_lib = "7-Ft-lBdTCSoe56OQkETSQ" | |
affiliations = [ | |
"School of Earth and Space Exploration", | |
"U West Austr", | |
"UWA", | |
"University of Western Australia", | |
"Curtin", | |
"ASU", | |
"Arizona State University" , | |
"-" | |
] | |
# --------------------------------------------- | |
console = Console() | |
now = datetime.now() | |
parser = argparse.ArgumentParser(description='Print out all LoCo publicatoins.') | |
parser.add_argument('--no-search-check', action='store_true', help="don't perform the check on the search") | |
args = parser.parse_args() | |
affiliations = ['"' + aff + '"' for aff in affiliations] | |
def print_paper(p, fmt=True): | |
if fmt: | |
return f"[bold]{p.author[0]}:[/bold] '{p.title[0]}' ({p.year})" | |
else: | |
return f"{p.author[0]}: '{p.title[0]}' ({p.year})" | |
my_library = Library(my_library) | |
my_false_lib = Library(my_false_lib) | |
fl=['author', 'title', 'bibcode', 'year', 'affiliations'] | |
console.print(Rule("Retrieving Papers")) | |
console.print("Retrieving library papers...", end=' ') | |
lib_papers = set(ads.SearchQuery( | |
q=f'docs(library/{my_library.id})', | |
max_pages=100, | |
fl=fl, | |
)) | |
console.print(f"found {len(lib_papers)} papers.") | |
console.print("Retrieving dynamic search papers...", end=' ') | |
if args.no_search_check: | |
qq = f"NOT docs(library/{my_library.id})" | |
else: | |
qq = '' | |
q = f'author:"{name}" collection:{collection} aff:({" OR ".join(affiliations)}) year:{year_started}- NOT docs(library/{my_false_lib.id}) {qq}' | |
search_papers = set(ads.SearchQuery( | |
q=q, | |
max_pages=100, | |
fl=fl, | |
)) | |
console.print(f"found {len(search_papers)} papers.") | |
console.print(f"Search Query: {q}") | |
console.print("Retrieving orcid papers...", end=' ') | |
orcid_papers = set(ads.SearchQuery( | |
q=f'orcid:{orcid}', | |
max_pages=100, | |
fl=fl, | |
)) | |
console.print(f"found {len(orcid_papers)} papers.") | |
# Put all new papers in my library | |
new = search_papers.union(orcid_papers).difference(lib_papers) | |
if new: | |
console.print(f"Found {len(new)} new papers, please select whether each one should be in your library:") | |
keep = [] | |
no_keep = [] | |
for p in new: | |
res = qs.rawselect(print_paper(p), choices=['yes', 'no', 'not sure', 'cancel']).ask() | |
if res =='yes': | |
keep.append(p) | |
elif res=='no': | |
no_keep.append(p) | |
elif res=='cancel': | |
sys.exit(0) | |
my_library.add_documents(keep) | |
lib_papers = lib_papers.union(set(keep)) | |
my_false_lib.add_documents(no_keep) | |
else: | |
console.print("[bold]Found no new papers!") | |
console.print() | |
# Check that all ORCID papers and library papers are found via dynamic search | |
not_found = orcid_papers.union(lib_papers).difference(search_papers) | |
if not_found: | |
console.print(Rule("Search Check")) | |
console.print("The following papers are associated with your ORCID or in your library, but not found in your search. Update your search terms!") | |
for p in not_found: | |
console.print(f" {print_paper(p)}") | |
# Check that all of my work is in ORCID | |
not_orcid = lib_papers.difference(orcid_papers) | |
if not_orcid: | |
console.print(Rule()) | |
url = "https://ui.adsabs.harvard.edu/search/q=" + quote(f"bibcode:({' OR '.join([p.bibcode for p in not_orcid])})") | |
console.print("The following papers are in your library, but not associated with ORCID. Go to adsabs to claim them:") | |
console.print(f" > '{url}'") | |
console.print() | |
for p in not_orcid: | |
console.print(f" {print_paper(p)}") | |
cfile = Path("~/.ads-stats.json").expanduser() | |
if cfile.exists(): | |
with open(cfile, 'r') as fl: | |
old_m = json.load(fl) | |
else: | |
old_m = {} | |
# Print out stats! | |
m = MetricsQuery([p.bibcode for p in lib_papers]).execute() | |
console.print() | |
console.print(Rule("Stats")) | |
if old_m: | |
old_time = datetime.strptime(old_m['time'], '%Y-%m-%d %H:%M') | |
console.print("Last time stats were taken: ", old_m["time"], "({(now - old_time).days} ago))") | |
else: | |
console.print("Stats never before taken!") | |
def get_entry(t, k): | |
full = m[t][k] | |
old = old_m.get(t, {}).get(k) | |
if old: | |
if full < old: | |
entry = f"{full:.1f} [red](-{old-full:.1f})[/red]" | |
elif full == old: | |
entry = f"{full:.1f}" | |
else: | |
entry = f"{full:.1f} [green]({full-old:.1f})[/green]" | |
else: | |
entry = f"{full:.1f}" | |
return entry | |
for t in ['basic stats', 'citation stats', 'indicators']: | |
table = Table(title=t) | |
table.add_column("Metric", justify='right') | |
table.add_column("Total", justify="left", style="cyan", no_wrap=True) | |
table.add_column("Refereed", justify="left", style="cyan", no_wrap=True) | |
for k in m[t]: | |
if not isinstance(m[t][k], list): | |
entry = get_entry(t, k) | |
ref_entry = get_entry(f'{t} refereed', k) | |
table.add_row(k, entry, ref_entry) | |
console.print(table) | |
console.print() | |
m['time'] = now.strftime('%Y-%m-%d %H:%M') | |
with open(cfile, 'w') as fl: | |
json.dump(m, fl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment