Skip to content

Instantly share code, notes, and snippets.

@mei-li
Created December 16, 2020 16:59
Show Gist options
  • Save mei-li/67570f00d1345eb61524a7eaece7d477 to your computer and use it in GitHub Desktop.
Save mei-li/67570f00d1345eb61524a7eaece7d477 to your computer and use it in GitHub Desktop.

Query to get all required data for one pulication

select
  p.id, doi, pmid, pmcid, p.title, p.abstract.preferred, acknowledgements.preferred, journal.title, book_series_title.preferred,
  proceedings_title , mesh_terms, date, year, volume, issue, pages, type,
  au.first_name, au.last_name, au.raw_affiliations, au.corresponding, au.researcher_id, au.grid_ids,
  p.research_orgs,
  grid.name, grid.id, grid.address.city , grid.address.country, grid.address.state
  funder_orgs,
  fgrid.address.country, fgrid.name,
  supporting_grant_ids,
  grants.id, grants.grant_number,
  p.metrics.times_cited, p.metrics.recent_citations, p.metrics.relative_citation_ratio, p.metrics.field_citation_ratio,
  p.altmetrics.score ,
  for_level1.name,
  for_level2.name,
  rcdc.value,
  hrcs_hc.value,
  hrcs_rac.name,
  hra.value, # category_hra
  bra.value, # category_bra
  icrp_ct.value, # category_icrp_ct
  icrp_cso.name, # category_icrp_cso
  uoa.name, # category_uoa
  sdg.name # category_sdg
from `dimensions-ai.data_analytics.publications` p
LEFT JOIN
  UNNEST(p.category_sdg.full )  as sdg
LEFT JOIN
  UNNEST(p.category_uoa.full )  as uoa
LEFT JOIN
  UNNEST(p.category_icrp_cso.full )  as icrp_cso
LEFT JOIN
  UNNEST(p.category_icrp_ct.full )  as icrp_ct
LEFT JOIN
  UNNEST(p.category_bra.full )  as bra
LEFT JOIN
  UNNEST(p.category_hra.full )  as hra
LEFT JOIN
  UNNEST(p.category_hrcs_rac.full ) as hrcs_rac
LEFT JOIN
  UNNEST(p.category_hrcs_hc.full ) as hrcs_hc
LEFT JOIN
  UNNEST(p.category_rcdc.full ) as rcdc
LEFT JOIN
  UNNEST(p.category_for.first_level.full) as for_level1
LEFT JOIN
  UNNEST(p.category_for.second_level.full) as for_level2
LEFT JOIN
  UNNEST(authors) as au
LEFT JOIN
   UNNEST(research_orgs) AS research_orgs_grids
LEFT JOIN
   UNNEST(funder_orgs) AS funder_orgs_grids
LEFT JOIN
   `dimensions-ai.data_analytics.grid` grid
ON
   grid.id=research_orgs_grids
LEFT JOIN
   `dimensions-ai.data_analytics.grid` fgrid
   ON
   fgrid.id=funder_orgs_grids
LEFT JOIN
   UNNEST(supporting_grant_ids) AS grant_ids
LEFT JOIN
   `dimensions-ai.data_analytics.grants` grants
ON
   grants.id=grant_ids
where p.id = 'pub.1132070778'

Query to try to get all in one line

  select
p.id AS id,
ANY_VALUE(doi) AS doi,
ANY_VALUE(pmid) AS pmid,
ANY_VALUE(pmcid) AS pmcid,
ANY_VALUE(p.title) AS title,
ANY_VALUE(p.abstract.preferred) AS abstract,
ANY_VALUE(acknowledgements.preferred) AS acknowledgements,
STRING_AGG(journal.title) AS journal_title,
STRING_AGG(book_series_title.preferred) AS book_series_title,
ANY_VALUE(proceedings_title) AS proceedings_title,
ANY_VALUE(mesh_terms) AS mesh_terms,
ANY_VALUE(date) AS date,
ANY_VALUE(year) AS year,
ANY_VALUE(volume) AS volume,
ANY_VALUE(issue) AS issue,
ANY_VALUE(pages) AS pages,
ANY_VALUE(type) AS type,

STRING_AGG(au.first_name ORDER BY au.researcher_id ASC) AS author_first_name,
STRING_AGG(au.last_name ORDER BY au.researcher_id ASC) AS author_last_name,
# TODO STRING_AGG(STRING_AGG(au.raw_affiliations) ORDER BY au.researcher_id ASC) AS author_affiliations,
# TODO STRING_AGG(au.corresponding ORDER BY au.researcher_id ASC) AS author_corresponding,
STRING_AGG(au.researcher_id ORDER BY au.researcher_id ASC) AS author_id,
# TODO STRING_AGG(au.grid_ids ORDER BY au.researcher_id ASC) AS author_grid_ids,

ANY_VALUE(p.research_orgs) AS research_orgs,

STRING_AGG(grid.name ORDER BY grid.id) AS research_orgs_name,
STRING_AGG(grid.id ORDER BY grid.id) AS research_orgs_grid,
STRING_AGG(grid.address.city ORDER BY grid.id) AS research_orgs_city,
STRING_AGG(grid.address.country ORDER BY grid.id) AS research_orgs_country,
STRING_AGG(grid.address.state ORDER BY grid.id) AS research_orgs_state,

ANY_VALUE(funder_orgs) AS funder_orgs,

STRING_AGG(fgrid.address.country ORDER BY fgrid.id) AS funder_country,
STRING_AGG(fgrid.name ORDER BY fgrid.id) AS funder_id,

ANY_VALUE(supporting_grant_ids) AS supporting_grant_ids,

STRING_AGG(grants.id ORDER BY grants.id) AS supporting_grant_id,
STRING_AGG(grants.grant_number ORDER BY grants.id) AS supporting_grant_number,

ANY_VALUE(p.metrics.times_cited) AS metrics_times_cited,
ANY_VALUE(p.metrics.recent_citations) AS metrics_recent_citations,
ANY_VALUE(p.metrics.relative_citation_ratio) AS metrics_relative_citation_ratio,
ANY_VALUE(p.metrics.field_citation_ratio) AS metrics_field_citation_ratio,
ANY_VALUE(p.altmetrics.score) AS metrics_altmetric_score,


STRING_AGG(for_level1.name) AS category_for_level1,
STRING_AGG(for_level2.name) AS category_for_level2,
STRING_AGG(rcdc.value) AS category_rcdc,
STRING_AGG(hrcs_hc.value) AS category_hrcs_hc,
STRING_AGG(hrcs_rac.name) AS category_rac,
STRING_AGG(hra.value) AS  category_hra, # category_hra
STRING_AGG(bra.value) AS category_bra, # category_bra
STRING_AGG(icrp_ct.value) AS category_icrp_ct, # category_icrp_ct
STRING_AGG(icrp_cso.name) AS category_icrp_cso, # category_icrp_cso
STRING_AGG(uoa.name) AS category_uoa, # category_uoa
STRING_AGG(sdg.name) AS category_sdg # category_sdg
from `dimensions-ai.data_analytics.publications` p
LEFT JOIN
  UNNEST(p.category_sdg.full )  as sdg
LEFT JOIN
  UNNEST(p.category_uoa.full )  as uoa
LEFT JOIN
  UNNEST(p.category_icrp_cso.full )  as icrp_cso
LEFT JOIN
  UNNEST(p.category_icrp_ct.full )  as icrp_ct
LEFT JOIN
  UNNEST(p.category_bra.full )  as bra
LEFT JOIN
  UNNEST(p.category_hra.full )  as hra
LEFT JOIN
  UNNEST(p.category_hrcs_rac.full ) as hrcs_rac
LEFT JOIN
  UNNEST(p.category_hrcs_hc.full ) as hrcs_hc
LEFT JOIN
  UNNEST(p.category_rcdc.full ) as rcdc
LEFT JOIN
  UNNEST(p.category_for.first_level.full) as for_level1
LEFT JOIN
  UNNEST(p.category_for.second_level.full) as for_level2
LEFT JOIN
  UNNEST(authors) as au
LEFT JOIN
   UNNEST(research_orgs) AS research_orgs_grids
LEFT JOIN
   UNNEST(funder_orgs) AS funder_orgs_grids
LEFT JOIN
   `dimensions-ai.data_analytics.grid` grid
ON
   grid.id=research_orgs_grids
LEFT JOIN
   `dimensions-ai.data_analytics.grid` fgrid
   ON
   fgrid.id=funder_orgs_grids
LEFT JOIN
   UNNEST(supporting_grant_ids) AS grant_ids
LEFT JOIN
   `dimensions-ai.data_analytics.grants` grants
ON
   grants.id=grant_ids
where p.id = 'pub.1132070778'
GROUP BY p.id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment