Skip to content

Instantly share code, notes, and snippets.

@Aariq
Last active January 31, 2022 15:50
Show Gist options
  • Save Aariq/a23958e168e347f1bacf9dfa777b911f to your computer and use it in GitHub Desktop.
Save Aariq/a23958e168e347f1bacf9dfa777b911f to your computer and use it in GitHub Desktop.
"hashes" from Elsevier PDF metadata
# My janky r-code wrapping commandline tool `exiftool`
library(stringr)
library(purrr)
library(glue)
pull_hashes <- function(file) {
xml <- system(glue("exiftool -b -xmp '{file}'"), intern = TRUE)
doi <-
str_extract(xml, '\\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])[[:graph:]])+)\\b') %>%
discard(~is.na(.x)) %>%
pluck(1)
hashes <-
str_match(xml, "<([0-9A-Za-z_.-]{40,})/>") %>% .[,2] %>%
discard(~is.na(.x))
# out <- list(hashes)
list(doi = doi, hash = hashes)
}
pdfs <- list.files("/Users/scottericr/Zotero/storage", pattern = "\\.pdf$", recursive = TRUE, full.names = TRUE)
hash_list <- map_df(pdfs, pull_hashes) %>% discard(~length(.x)==0)
write.csv(hash_list, "hash_list.csv", row.names = FALSE)
doi hash
10.1016/j.actao.2020.103617 Zlkjsntz.y.eJnMiGndePzcNNnteLlwj.n96GywySzgf_otiSzdmLo9ePmtyJot6SmdqTma
10.1016/j.foreco.2020.118845 tprDsndb.ngeNzduGmdj-osNNmd3-lweLndiGmM6Jmtv8otmQn.mPo9ePmMyQndeOod6Tma
10.1016/j.scitotenv.2021.145397 6mIUqzdiOzweKywuGzt6Pn8NNzt2Slwj9yMqGngf_ztaMmMyKmwyNo9ePmtqPmtyRmMiTma
10.1016/j.ecolmodel.2019.108856 HIoninwyOmgeOot6Gz96PyLNNmt-PlwiJyMaGotySnMmPogqRnMeOo9ePmMiNm9ePm9iTma
10.1016/j.ecolmodel.2021.109813 LMfnsztyMnMr8zd-GyPyKnsNNnM37lweLy9eGotr_mM3.zt-QyMuSo9ePndeLm96JndaTma
10.1016/j.ecoinf.2020.101178 LMfnsm.iRnM79nMeGyPz7mLNNot3.lt_.od2GztaQogf-mwf-nM-Qo9ePmtqQmdyJn9eTma
10.1016/j.foodres.2016.10.024 HIoniz9v9mPj.y9eGzt-RncNNnMb7lweSyt-Gytr8nPePndmJyteNo9eOot2Jn9qQntuTma
10.1016/j.ecolmodel.2021.109813 LMfnsztyMnMr8zd-GyPyKnsNNnM37lweLy9eGotr_mM3.zt-QyMuSo9ePndeLm96JndaTma
10.1016/j.foodchem.2018.03.042 zjJBNmtf.mgeMywyGndb8msNNngeKlweSmdaGz9-Kz.qKyM_9n9mQo9eOot2Pn96Nmd2Tma
10.1016/j.ecolmodel.2020.109286 HIoniogyKnM37mwyGm.iNnsNNmd2Slwj7n.mGztf8m.mQmPuJnPyRo9ePmdaQotySm9mTma
10.1016/j.foreco.2016.04.050 TGji2nguOngv.mMyGz.uQocNNmMiLlt7-y.uGn9z_mMz_mM__ywn_o9ePmtyJot6QnMiTma
10.1016/j.csda.2011.02.004 LMfnszgz_y9uMmtaGz9r-osNNzgmLlt-Kz.iGnMqKmtf8mMaLot6Oo9ePmd-Rn9yLndiTma
10.1016/j.tibs.2018.12.004 zjJBNy.eQytv.mteGot6Jz8NNzwyOlt6RnduGzgr9ntj9ztyNod79o9ePmdaRmdmOod2Tma
10.1016/j.ecolmodel.2019.03.011 DpairyM7_zweLyPiGz.r.yLNNmwyNlt6Oz9-Gn9z-ytmRngv_nweKo9ePmMiNm9eOm96Tma
10.1016/j.ejbt.2017.05.010 ZtV1wzgeKnMz.mduGy9n_ncNNnwmNlwf.zduGz937yM2Mzdf8nMv.o9ePmdaJod6Rnd-Tma
10.1016/j.ecolmodel.2019.108870 lXLf8yMuNn9eJntuGywqQm8NNod79lt_9ogqGn92LotmKmtuJyweNo9eOnM-Pmd-Jod6Tma
10.1016/j.ecolmodel.2019.03.007 DpairyMf9ot6OmdiGz9qRyLNNzwiJlweLn9-GmwyQmMqPnteRotb-o9ePm9aJntyJn9aTma
10.1016/j.aca.2015.02.012 D2ei2z9b_yteQmMuGnwf8ocNNm.yKlweRy9qGmt_.mtqLntj8m.iQo9ePmdaMntiNnM-Tma
10.1016/j.csda.2011.02.004 LMfnszgz_y9uMmtaGz9r-osNNzgmLlt-Kz.iGnMqKmtf8mMaLot6Oo9ePmd-Rn9yLndiTma
10.1016/j.tree.2016.07.002 w8arlzdn_mguMywyGzd38nsNNn.iJlweKotaGnM7_mMr9ot2OytiQo9ePm9qMmduLmdeTma
10.1016/j.csda.2011.02.004 tprDsztz.od2OywuGy9uRysNNmweLlt6Rzt2Got-MyM2PmdiOnd-Jo9ePm92NotiQndiTma
10.1016/j.tifs.2020.05.015 D2ei2mwv-ytuRzgyGndyNn8NNyt-Llt-SntuGmd-Sz9-Nmd2Sztf.o9ePmdyQntuJn9yTma
10.1016/j.ecolmodel.2021.109813 LMfnsztyMnMr8zd-GyPyKnsNNnM37lweLy9eGotr_mM3.zt-QyMuSo9ePndeLm96JndaTma
10.1016/j.biocon.2020.108948 Ml8ZFodmKz9aMnMeGnMyOnLNNmMmMlweSot-GzweLy.r8z9f8z9v-o9ePmtiRotmKmduTma
10.1016/j.arabjc.2017.05.011 lXLf8mMz-odj_nt6Gz9-MzsNNyMmRlt_-mdmGmt6KmMn.mgmQzwqLo9ePndmNmdiQodmTma
10.1016/j.ecolmodel.2019.108877 6U7vczdb-mdn9nMmGy9mNmsNNm9uLlt6SnMmGnMyNmMb_y9z_z9b-o9ePmMiNm9eNoduTma
10.1016/j.ecolmodel.2021.109815 lXLf8n9b9zd-Ln92GzdmOmLNNnwuRlwiKmgyGzwySn9mPmtr8mMiMo9ePndeLm92SmMqTma
10.1016/j.sajb.2017.03.010 6U7vcm9uMzgz8nPeGzgv.nLNNmt7.lweSodiGmd38yt3_zwv8z96Jo9ePmMeLmdiNmM-Tma
10.1016/j.phytochem.2020.112515 XlEDunduMm9_-n96Gzd3-ncNNnMeKlweQmtyGztr8ztz-mwf9z.qOo9ePmdeMmdqQmMaTma
10.1016/j.atmosres.2020.105111 w8arlyMaLz96Omd2Gy97_mLNNyM6Nlwf_otyGnPeSn.j7ytuOn.n-o9ePmt-Qmd6RodyTma
10.1016/j.ecoinf.2020.101136 Zlkjsn.mMnt2Rmd-Gngf7mcNNywmNlt6SotuGzdmJmtv7yMuKodmJo9eOot-OotuOndmTma
10.1016/j.tree.2021.10.009 X9O3Czgz.ytr-ndyGnwj7ysNNm9qPlt79otuGmtqMy9eMywz-md__o9ePmM2KnM2RmdqTma
10.1016/j.tree.2016.07.002 w8arlzdn_mguMywyGzd38nsNNn.iJlweKotaGnM7_mMr9ot2OytiQo9ePm9qMmduLmdeTma
10.1016/j.arabjc.2017.05.011 zjJBNyPv9mwqRnPuGnd6RncNNnd37lt6Sn9-Gn9eOmdaPywyJm9b8o9ePndmNmdiSodmTma
10.1016/j.tree.2011.09.008 Dpairown-mgeLn9mGyPn.msNNod6LlwiNotqGowuLngeQnweMmwmLo9ePmtqPmMyOntiTma
10.1016/j.tree.2021.11.003 X9O3CyMmMotmMzdeGnPeSnsNNmgyRlt-Qy96GmweLmM6Motf8ytuOo9ePmM6Jn96Nm9aTma
10.1016/j.biopha.2017.09.024 6U7vcoteOndeOotaGm9iMm8NNmPn9lt-Ly9uGn9-QyMz-ztyQmPyPo9ePmMuRmdeSmdyTma
10.1016/j.envexpbot.2021.104557 TGji2yMmQnM2On92GoteJm8NNz96OlwiNz92Gz.n7nPqNmgv7m9_9o9ePm9qLodaMndyTma
10.1016/j.biocon.2020.108520 zjJBNy9r.yPj8otuGod-LmLNNndeOlt-LmMyGodr_z.yQnM2Smwn.o9ePmtiRodiSod2Tma
10.1016/j.jinsphys.2012.06.008 eMmMVyMyLmduQz9-GyMyPocNNmMn7lt-Nn9uGmwn8mt__y.ePnMuSo9eOn96KntqKoty
10.1016/j.ecoinf.2020.101178 ZlkjsnwuKy9uOz9mGngf7zsNNnMyJlt_-mM6GndeKnd6KzgiSy9_-o9ePmdyRntiQnd2Tma
10.1016/j.econedurev.2010.06.010 D2ei2yMv_odj.yM2Gy.iRn8NNodn.lt_8yMuGngiJn.eNm9mNodiNo9eOotqPndyNodiTma
10.1016/j.ecoinf.2021.101251 eMmMVnPeLmdiJy.mGyMuLz8NNz.qJlwf7nt2GmtiKy97-otn8nd6Po9ePmtuMmtqKnMeTma
10.1016/j.envexpbot.2020.104001 TGji2zt2MyMmJoteGod-SnLNNz.z_lt6KztuGz9f.nPyPy.mMzdmJo9ePmduJmt-NodiTma
10.1016/j.csda.2011.02.004 LMfnszgz_y9uMmtaGz9r-osNNzgmLlt-Kz.iGnMqKmtf8mMaLot6Oo9ePmd-Rn9yLndiTma
10.1016/j.actao.2014.05.009 zjJBNywj_mMj_n9eGn9qPzcNNm9_-lt7.ntqGnPyOywiQn.j7mPf-o9ePmt-KmduOndaTma
10.1016/j.foreco.2020.118845 tprDsndb.ngeNzduGmdj-osNNmd3-lweLndiGmM6Jmtv8otmQn.mPo9ePmMyQndeOod6Tma
10.1016/j.ecoinf.2021.101235 HIonim9z.mtz9n92GyMuNnLNNzdn8lt-Pm9mGm.qOn.yOmPqMz.r8o9ePmtqKmdqJntyTma
10.1016/j.cois.2016.07.004 zjJBNzdmJywiPnMeGnMmSosNNm9j7lt6Rnt-GzgeQnPyPot6Lmtf7o9ePmd6Jn9aSodeTma
10.1016/j.ecolmodel.2019.108797 DpairntyLn.iQm.qGztiJncNNyPqMlt-PytmGmdv_ztyKmwiPmd6Po9ePmMiNm9eOoduTma
10.1016/j.econedurev.2010.07.009 D2ei2mtf-mPf-mMuGy9-LmLNNyt6Rlt6JnteGy.n7mwmRowj.odr_o9eOoteQmMqPn9mTma
10.1016/j.envexpbot.2020.104001 TGji2zt2MyMmJoteGod-SnLNNz.z_lt6KztuGz9f.nPyPy.mMzdmJo9ePmduJmt-NodiTma
10.1016/j.arabjc.2017.05.011 d2UUdndiSmdeLmMuGzt6JmsNNzdeOlt-SmwuGy96Jzgz7zwqLmduJo9ePndmNmdiRm9iTma
10.1016/j.foodres.2020.109347 XlEDungr7mdqRogyGztf.nsNNm.uRlwiPnduGnwj7ytr.y9mMnteJo9eOotqKntiOod2Tma
10.1016/j.jinsphys.2012.06.008 eMmMVyMyLmduQz9-GyMyPocNNmMn7lt-Nn9uGmwn8mt__y.ePnMuSo9eOn96KntqKoty
10.1016/j.ecolmodel.2021.109457 d2UUdnd79nPf-mwyGndz8zsNNmMz9lt-LytmGmd-PoduKmd6LnPyLo9ePmtuMmtqMnteTma
@Aariq
Copy link
Author

Aariq commented Jan 31, 2022

There's definitely some pattern here with most of them ending in "Tma" and just one in "Koty". Repeated beginnings include DpairyM, HIoni, LMfns, and D2ei2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment