steven-murray/manage_my_pubs.py

## manage_my_pubs.py
"""
Super simple script for managing all your ADS publications.

Installation and Setup
----------------------
To setup for calling the script, you must install ads::

  $ pip install git+git://github.com/steven-murray/ads

and also the following deps::

  $ pip intsall questionary rich

You also must create an ADS API key (go to your settings page on ADS)
and save it in `~/.ads/dev_key`.

There is no installation of this script, you just run it from the command line::

  $ python manage_my_pubs.py

Use `--help` to get options.

Settings
--------
The settings are towards the top of the file -- you should modify them for yourself.
Maybe in the future it can take in a YAML config. Most of them should be self-explanatory.

The `my_library` and `my_false_lib` settings are the refrences to 2 ADS Libraries.
The first should be your canonical library holding all of your works (this is what
this script updates!). The second is a library in which you can put papers that are
NOT yours, but which turn up under your search (i.e. there's another researcher with
the same name who has collaborated with someone from the same institution, etc.).

How it Works
------------
The script defines an ADS search query that should be designed to find *all* your papers.
For some people, this search query will also turn up some papers that are *not* yours.
The script will go through all the papers, and if any of them aren't in your library, it
will ask you whether it should put them there (or in your false positive list, so they
don't reappear every time). It will also do a check to see if all your papers have been
claimed in ORCID (it will also do a check to ensure all your ORCID papers are in your library,
regardless of whether they turned up in your search). Furthermore, it will check all the papers
in your library + ORCID, and if any do *not* come up in your search term, it will let you know,
so you can generalize your search term a bit.

In the end, if you run this script reasonably regularly, you should be able to point people
towards your "official" library, knowing that it is always up-to-date.

Finally, the script will print out some stats about your body of work!
"""
import sys
import ads
import argparse
from datetime import datetime
import questionary as qs
from ads.libraries import Library
from rich.console import Console
from ads.metrics import MetricsQuery
from rich.rule import Rule
from urllib.parse import quote
from rich.table import Table
import json
from pathlib import Path

# ==============================================
# USER SETTINGS
# ==============================================
name = "Murray, Steven G."
year_started = 2013
orcid = "0000-0003-3059-3823"
collection = 'astronomy'

my_library = "qfT0ZuGSRCWBI5sG0rl5hw"
my_false_lib = "7-Ft-lBdTCSoe56OQkETSQ"

affiliations = [
    "School of Earth and Space Exploration",
    "U West Austr",
    "UWA",
    "University of Western Australia",
    "Curtin",
    "ASU",
    "Arizona State University" ,
    "-"
]
# ---------------------------------------------

console = Console()

now = datetime.now()

parser = argparse.ArgumentParser(description='Print out all LoCo publicatoins.')
parser.add_argument('--no-search-check', action='store_true', help="don't perform the check on the search")

args = parser.parse_args()


affiliations = ['"' + aff + '"' for aff in affiliations]

def print_paper(p, fmt=True):
    if fmt:
        return f"[bold]{p.author[0]}:[/bold] '{p.title[0]}' ({p.year})"
    else:
        return f"{p.author[0]}: '{p.title[0]}' ({p.year})"

my_library = Library(my_library)
my_false_lib = Library(my_false_lib)

fl=['author', 'title', 'bibcode', 'year', 'affiliations']

console.print(Rule("Retrieving Papers"))
console.print("Retrieving library papers...", end=' ')
lib_papers = set(ads.SearchQuery(
    q=f'docs(library/{my_library.id})',
    max_pages=100,
    fl=fl,
))
console.print(f"found {len(lib_papers)} papers.")

console.print("Retrieving dynamic search papers...", end=' ')
if args.no_search_check:
    qq = f"NOT docs(library/{my_library.id})"
else:
    qq = ''

q = f'author:"{name}" collection:{collection} aff:({" OR ".join(affiliations)}) year:{year_started}- NOT docs(library/{my_false_lib.id}) {qq}'

search_papers = set(ads.SearchQuery(
    q=q,
    max_pages=100,
    fl=fl,
))
console.print(f"found {len(search_papers)} papers.")
console.print(f"Search Query: {q}")

console.print("Retrieving orcid papers...", end=' ')
orcid_papers = set(ads.SearchQuery(
    q=f'orcid:{orcid}',
    max_pages=100,
    fl=fl,
))
console.print(f"found {len(orcid_papers)} papers.")

# Put all new papers in my library
new = search_papers.union(orcid_papers).difference(lib_papers)

if new:
    console.print(f"Found {len(new)} new papers, please select whether each one should be in your library:")
    keep = []
    no_keep = []
    for p in new:
        res = qs.rawselect(print_paper(p), choices=['yes', 'no', 'not sure', 'cancel']).ask()
        if res =='yes':
            keep.append(p)
        elif res=='no':
            no_keep.append(p)
        elif res=='cancel':
            sys.exit(0)

    my_library.add_documents(keep)
    lib_papers = lib_papers.union(set(keep))

    my_false_lib.add_documents(no_keep)

else:
    console.print("[bold]Found no new papers!")

console.print()

# Check that all ORCID papers and library papers are found via dynamic search
not_found = orcid_papers.union(lib_papers).difference(search_papers)
if not_found:
    console.print(Rule("Search Check"))
    console.print("The following papers are associated with your ORCID or in your library, but not found in your search. Update your search terms!")
    for p in not_found:
        console.print(f"  {print_paper(p)}")

# Check that all of my work is in ORCID
not_orcid = lib_papers.difference(orcid_papers)
if not_orcid:
    console.print(Rule())
    url = "https://ui.adsabs.harvard.edu/search/q=" + quote(f"bibcode:({' OR '.join([p.bibcode for p in not_orcid])})")
    console.print("The following papers are in your library, but not associated with ORCID. Go to adsabs to claim them:")
    console.print(f" > '{url}'")
    console.print()
    for p in not_orcid:
        console.print(f"  {print_paper(p)}")

cfile = Path("~/.ads-stats.json").expanduser()

if cfile.exists():
    with open(cfile, 'r') as fl:
        old_m = json.load(fl)
else:
    old_m = {}

# Print out stats!
m = MetricsQuery([p.bibcode for p in lib_papers]).execute()

console.print()
console.print(Rule("Stats"))
if old_m:
    old_time = datetime.strptime(old_m['time'], '%Y-%m-%d %H:%M')
    console.print("Last time stats were taken: ", old_m["time"], "({(now - old_time).days} ago))")


else:
    console.print("Stats never before taken!")


def get_entry(t, k):
    full = m[t][k]
    old = old_m.get(t, {}).get(k)
    if old:
        if full < old:
            entry = f"{full:.1f} [red](-{old-full:.1f})[/red]"
        elif full == old:
            entry = f"{full:.1f}"
        else:
            entry = f"{full:.1f} [green]({full-old:.1f})[/green]"

    else:
        entry = f"{full:.1f}"

    return entry

for t in ['basic stats', 'citation stats', 'indicators']:
    table = Table(title=t)

    table.add_column("Metric", justify='right')
    table.add_column("Total", justify="left", style="cyan", no_wrap=True)
    table.add_column("Refereed", justify="left", style="cyan", no_wrap=True)

    for k in m[t]:
        if not isinstance(m[t][k], list):
            entry = get_entry(t, k)
            ref_entry = get_entry(f'{t} refereed', k)
            table.add_row(k, entry, ref_entry)

    console.print(table)
    console.print()


m['time'] = now.strftime('%Y-%m-%d %H:%M')


with open(cfile, 'w') as fl:
    json.dump(m, fl)
	"""
	Super simple script for managing all your ADS publications.

	Installation and Setup
	----------------------
	To setup for calling the script, you must install ads::

	$ pip install git+git://github.com/steven-murray/ads

	and also the following deps::

	$ pip intsall questionary rich

	You also must create an ADS API key (go to your settings page on ADS)
	and save it in `~/.ads/dev_key`.

	There is no installation of this script, you just run it from the command line::

	$ python manage_my_pubs.py

	Use `--help` to get options.

	Settings
	--------
	The settings are towards the top of the file -- you should modify them for yourself.
	Maybe in the future it can take in a YAML config. Most of them should be self-explanatory.

	The `my_library` and `my_false_lib` settings are the refrences to 2 ADS Libraries.
	The first should be your canonical library holding all of your works (this is what
	this script updates!). The second is a library in which you can put papers that are
	NOT yours, but which turn up under your search (i.e. there's another researcher with
	the same name who has collaborated with someone from the same institution, etc.).

	How it Works
	------------
	The script defines an ADS search query that should be designed to find all your papers.
	For some people, this search query will also turn up some papers that are not yours.
	The script will go through all the papers, and if any of them aren't in your library, it
	will ask you whether it should put them there (or in your false positive list, so they
	don't reappear every time). It will also do a check to see if all your papers have been
	claimed in ORCID (it will also do a check to ensure all your ORCID papers are in your library,
	regardless of whether they turned up in your search). Furthermore, it will check all the papers
	in your library + ORCID, and if any do not come up in your search term, it will let you know,
	so you can generalize your search term a bit.

	In the end, if you run this script reasonably regularly, you should be able to point people
	towards your "official" library, knowing that it is always up-to-date.

	Finally, the script will print out some stats about your body of work!
	"""
	import sys
	import ads
	import argparse
	from datetime import datetime
	import questionary as qs
	from ads.libraries import Library
	from rich.console import Console
	from ads.metrics import MetricsQuery
	from rich.rule import Rule
	from urllib.parse import quote
	from rich.table import Table
	import json
	from pathlib import Path

	# ==============================================
	# USER SETTINGS
	# ==============================================
	name = "Murray, Steven G."
	year_started = 2013
	orcid = "0000-0003-3059-3823"
	collection = 'astronomy'

	my_library = "qfT0ZuGSRCWBI5sG0rl5hw"
	my_false_lib = "7-Ft-lBdTCSoe56OQkETSQ"

	affiliations = [
	"School of Earth and Space Exploration",
	"U West Austr",
	"UWA",
	"University of Western Australia",
	"Curtin",
	"ASU",
	"Arizona State University" ,
	"-"
	]
	# ---------------------------------------------

	console = Console()

	now = datetime.now()

	parser = argparse.ArgumentParser(description='Print out all LoCo publicatoins.')
	parser.add_argument('--no-search-check', action='store_true', help="don't perform the check on the search")

	args = parser.parse_args()


	affiliations = ['"' + aff + '"' for aff in affiliations]

	def print_paper(p, fmt=True):
	if fmt:
	return f"[bold]{p.author[0]}:[/bold] '{p.title[0]}' ({p.year})"
	else:
	return f"{p.author[0]}: '{p.title[0]}' ({p.year})"

	my_library = Library(my_library)
	my_false_lib = Library(my_false_lib)

	fl=['author', 'title', 'bibcode', 'year', 'affiliations']

	console.print(Rule("Retrieving Papers"))
	console.print("Retrieving library papers...", end=' ')
	lib_papers = set(ads.SearchQuery(
	q=f'docs(library/{my_library.id})',
	max_pages=100,
	fl=fl,
	))
	console.print(f"found {len(lib_papers)} papers.")

	console.print("Retrieving dynamic search papers...", end=' ')
	if args.no_search_check:
	qq = f"NOT docs(library/{my_library.id})"
	else:
	qq = ''

	q = f'author:"{name}" collection:{collection} aff:({" OR ".join(affiliations)}) year:{year_started}- NOT docs(library/{my_false_lib.id}) {qq}'

	search_papers = set(ads.SearchQuery(
	q=q,
	max_pages=100,
	fl=fl,
	))
	console.print(f"found {len(search_papers)} papers.")
	console.print(f"Search Query: {q}")

	console.print("Retrieving orcid papers...", end=' ')
	orcid_papers = set(ads.SearchQuery(
	q=f'orcid:{orcid}',
	max_pages=100,
	fl=fl,
	))
	console.print(f"found {len(orcid_papers)} papers.")

	# Put all new papers in my library
	new = search_papers.union(orcid_papers).difference(lib_papers)

	if new:
	console.print(f"Found {len(new)} new papers, please select whether each one should be in your library:")
	keep = []
	no_keep = []
	for p in new:
	res = qs.rawselect(print_paper(p), choices=['yes', 'no', 'not sure', 'cancel']).ask()
	if res =='yes':
	keep.append(p)
	elif res=='no':
	no_keep.append(p)
	elif res=='cancel':
	sys.exit(0)

	my_library.add_documents(keep)
	lib_papers = lib_papers.union(set(keep))

	my_false_lib.add_documents(no_keep)

	else:
	console.print("[bold]Found no new papers!")

	console.print()

	# Check that all ORCID papers and library papers are found via dynamic search
	not_found = orcid_papers.union(lib_papers).difference(search_papers)
	if not_found:
	console.print(Rule("Search Check"))
	console.print("The following papers are associated with your ORCID or in your library, but not found in your search. Update your search terms!")
	for p in not_found:
	console.print(f" {print_paper(p)}")

	# Check that all of my work is in ORCID
	not_orcid = lib_papers.difference(orcid_papers)
	if not_orcid:
	console.print(Rule())
	url = "https://ui.adsabs.harvard.edu/search/q=" + quote(f"bibcode:({' OR '.join([p.bibcode for p in not_orcid])})")
	console.print("The following papers are in your library, but not associated with ORCID. Go to adsabs to claim them:")
	console.print(f" > '{url}'")
	console.print()
	for p in not_orcid:
	console.print(f" {print_paper(p)}")

	cfile = Path("~/.ads-stats.json").expanduser()

	if cfile.exists():
	with open(cfile, 'r') as fl:
	old_m = json.load(fl)
	else:
	old_m = {}

	# Print out stats!
	m = MetricsQuery([p.bibcode for p in lib_papers]).execute()

	console.print()
	console.print(Rule("Stats"))
	if old_m:
	old_time = datetime.strptime(old_m['time'], '%Y-%m-%d %H:%M')
	console.print("Last time stats were taken: ", old_m["time"], "({(now - old_time).days} ago))")


	else:
	console.print("Stats never before taken!")


	def get_entry(t, k):
	full = m[t][k]
	old = old_m.get(t, {}).get(k)
	if old:
	if full < old:
	entry = f"{full:.1f} [red](-{old-full:.1f})[/red]"
	elif full == old:
	entry = f"{full:.1f}"
	else:
	entry = f"{full:.1f} [green]({full-old:.1f})[/green]"

	else:
	entry = f"{full:.1f}"

	return entry

	for t in ['basic stats', 'citation stats', 'indicators']:
	table = Table(title=t)

	table.add_column("Metric", justify='right')
	table.add_column("Total", justify="left", style="cyan", no_wrap=True)
	table.add_column("Refereed", justify="left", style="cyan", no_wrap=True)

	for k in m[t]:
	if not isinstance(m[t][k], list):
	entry = get_entry(t, k)
	ref_entry = get_entry(f'{t} refereed', k)
	table.add_row(k, entry, ref_entry)

	console.print(table)
	console.print()


	m['time'] = now.strftime('%Y-%m-%d %H:%M')


	with open(cfile, 'w') as fl:
	json.dump(m, fl)