Skip to content

Instantly share code, notes, and snippets.

@ChlodAlejandro
Last active January 29, 2024 17:30
Show Gist options
  • Save ChlodAlejandro/fa7ab57bd53355b436455d44f8381891 to your computer and use it in GitHub Desktop.
Save ChlodAlejandro/fa7ab57bd53355b436455d44f8381891 to your computer and use it in GitHub Desktop.
CNN Philippines externallinks archive status report for English Wikipedia
// save result of https://quarry.wmcloud.org/query/80025 as csv
// put in directory as "masterlist.csv"
import * as fs from "fs";
import fetch from "node-fetch"
const BASE_URL = "https://cnnphilippines.com";
const data = fs.readFileSync("masterlist.csv").toString("utf8").split("\n").slice(1);
if (fs.existsSync("a.txt"))
fs.unlinkSync("a.txt");
if (fs.existsSync("na.txt"))
fs.unlinkSync("na.txt");
if (fs.existsSync("checked.txt"))
fs.unlinkSync("checked.txt");
async function get(i, url) {
if (url.hash.startsWith("#:~:text")) {
url.hash = "";
}
if (url.searchParams.has("fbclid")) {
url.searchParams.delete("fbclid");
}
const actualURL = url.toString();
console.log("(" + i + "/" + data.length + ") Checking: " + actualURL);
await fetch(`https://archive.org/wayback/available?url=${encodeURIComponent(actualURL)}`)
.then(async (r) => {
if (r.status == 429) {
await new Promise((res) => { setTimeout(res, 30e3); });
await get(i, url);
throw "skipped";
} else {
return r;
}
})
.then(r => r.json())
.then(j => {
fs.appendFileSync("checked.txt", actualURL + "\n");
if (Object.keys(j.archived_snapshots).length === 0) {
console.warn("(" + i + "/" + data.length + ") Not archived: " + actualURL);
fs.appendFileSync("not-archived.txt", actualURL + "\n");
} else {
console.log("(" + i + "/" + data.length + ") Archived: " + actualURL);
fs.appendFileSync("archived.txt", actualURL + "\n");
}
})
.catch((e) => {
if (e == "skipped") {
return;
} else {
throw e;
}
})
}
(async () => {
for (const i in data) {
let rawUrl = data[i].trim();
if (rawUrl.startsWith('"') && rawUrl.endsWith('"')) {
rawUrl = rawUrl.replace(/^"(.+)"$/g, "$1");
}
const url = new URL(rawUrl, "https://cnnphilippines.com");
await get(i + 1, url);
}
})();
/entertainment/2017/01/05/MMFF-2017-box-office-ranking.html
/entertainment/2017/04/05/Coldplay-Manila-Philippines-concert-Ken-Santiago-Ink-song.html
/entertainment/2019/02/11/armida-siguion-reyna-death.html
/lifestyle/2015/01/29/Pill-lets-you-get-buzzed-no-hangover.html
/metro/2015/05/07/metro-manila-police-use-e-blotter-e-cirs.html
/news/2015/10/29/Jejomar-Binay-Camarines-Sur-Luis-Villafuerte-Arnulfo-Fuentebella-2016-elections.html
/news/2015/12/22/comelec-en-banc-cancles-grace-poes-coc-for-presidency.html
/news/2016/04/21/Duterte-lashes-envoys-womens-group.html
/news/2016/05/19/Bangladesh-PH-relations-money-laundering-scandal.html
/news/2016/12/22/journalists-probe-Catanduanes-publisher-killing.html
/news/2017/04/08/earthquakes-hit-batangas-felt-in-other-luzon-areas.html
/news/2017/06/12/bill-to-rename-PH-by-gary-alejano.html
/news/2017/07/22/Martial-law-Mindanao-Marawi-human-rights-violations.html
/news/2017/10/20/Marawi-hostage-rescue-Maute.html
/news/2018/04/13/mocha-uson-editing-war-vandalism.html
/news/2018/05/22/associate-justice-antonio-carpio-vice-president-leni-robredo-protest-china-bomber-planes-south-china-sea.html
/news/2018/06/19/Supreme-Court-ill-gotten-wealth-PCGG-Ferdinand-Marcos-cronies.html
/news/2018/10/21/nine-killed-negros-occidental-sugar-plantation.html
/regional/2016/06/24/Abu-Sayyaf-frees-Filipina-hostage.html
/sports/2015/05/05/albay-2016-palarong-pambansa-prisaa.html
/sports/2016/04/30/la-salle-ateneo-uaap-volleyball-championship.html
/sports/2018/08/15/Asian-Games-Jordan-Clarkson-PH-flag-bearer.html
/videos/2016/01/26/The-51st-International-Eucharistic-Congress.html
/videos/2016/03/28/Comelec-site-defaced.html
/videos/2016/08/04/Kodo-Nakano-is-13th-Filipino-Olympian.html
/videos/2017/10/12/The-Boardroom-Align-Commerce.html
/sports/2015/11/22/UAAP-Season-78-Final-Four-NU-UST-Vigil-Ferrer.html
/.imaging/mte/demo-cnn-new/750x450/dam/cnn/2017/3/13/Arman-Balilo-The-Source_CNNPH.jpg/jcr:content/Arman-Balilo-The-Source_CNNPH.jpg
/business/2020/2/3/DICT-cash-advance-.html
/entertainment/2019/12/11/U2-filipino-american-poet-collaboration.html
/entertainment/2019/7/10/miss-earth-philippines-2019.html
/entertainment/2019/9/16/pista-ng-pelikulang-filipino-2019-lola-igna.html
/entertainment/2021/11/16/heber-bartolome-death.html
/life/culture/2018/10/11/pre-colonial-philippines.html
/news/2016/02/04/Abaya-DOTC-officials-anti-graft-law-MRT-3.html
/news/2017/03/07/Aquino-cleared-Abad-charged
/news/2018/01/19/Constitution-framer-on-cha-cha.html
/news/2019/11/28/cayetano-2022.html
/news/2019/11/30/LOOK--Sports-legends-carry-SEA-Games-Federation-flag.html
/news/2019/9/22/maldives-fire-dfa.html
/news/2019/9/27/PMA-cadet-Darwin-Dormitorio-hazing-allowance-spending-letter.html
/news/2020/12/28/PSG-military-first-COVID-vaccination-protect-Duterte.html
/news/2020/9/27/Cayetano-Paolo-Duterte-Davao-City.html
/news/2021/2/19/mrt-7-more-than-50-percent-complete-set-to-open-dec-2022.html
/news/2021/4/9/Chinese-vessels-chase-Filipino-crew-West-Philippine-Sea.html
/news/2021/8/28/COVID-19-new-all-time-high-19000.html
/news/2022/2/9/Moreno-Ong-proclamation-rally.html
/seagames/2019/11/30/2019-sea-games-opening-ceremony-.html
/sports/2015/04/07/pba-semifinals-rain-or-shine-finals-berth.html
/sports/2021/5/22/Kurt-Barbosa-qualifies-for-Tokyo-Olympics.html
/sports/2022/2/9/Asa-Miller-credits-father-for-skiing-influence.html
/videos/2020/12/22/OCTA--COVID-19-cases-sa-NCR-patuloy-ang-pagtaas.html
/videos/2021/1/19/Remembering-1995-World-Youth-Day-through-music.html
/videos/2021/2/11/CNN-Philippines-to-launch-first-ever-podcast--Suddenly-Family-.html
/business/2020/5/2/pogo-not-bpo-.html
/business/2023/3/29/doj-junks-tax-evasion-case-joseph-calata-agri-phil-corp.html
/entertainment/2022/11/27/SB19-announces-new-album-in-the-works-larger-world-tour-in-2023.html
/entertainment/2022/4/11/2022-PPOPCON.html
/entertainment/2022/6/26/Philippines-Miss-International-Queen-2022-.html
/life/culture/politics/2019/3/5/conjugal-dictatorship.html
/life/entertainment/Film/2020/2/26/jay-altarejos-interview.html
/news/2017/01/11/Duterte-EO-supporting-family-planning.html
/news/2017/03/06/duterte-approves-national-broadband-program.html
/news/2018/05/30/duterte-fires-customs-deputy-commissioner-noel-patrick-sales-prudente.html
/news/2019/11/15/japanese-organized-crime-group-makati-.html
/news/2019/12/31/Typhoon-Ursula-death-toll-50/
/news/2019/2/27/cadiz-city-negros-occidental-priest-rape-charge-molestation-of-4-year-old-girl.html
/news/2019/5/22/Comelec-proclamation-senators-party-list-groups.htmlgoXPS1Gx7LI
/news/2019/8/5/William-Dar-Agriculture-Secretary.html
/news/2020/10/9/Ricardo-Rosario-Supreme-Court-appointment.html
/news/2020/3/3/filipino-muslim-cemetery-in-manila-.html
/news/2020/4/12/DOLE-COVID-quarantine-1-million-displaced-workers.html
/news/2020/6/3/DOJ-Gabby-Lopez-Filipino-citizen.html
/news/2021/10/31/NPA-leader-Ka-Oris-death-AFP.html
/news/2021/12/4/Comelec-rejects-107-party-list-groups-with-finality.html
/news/2022/1/22/Lacson-denies-involvement-in-Martial-Law-abuses--evading-murder-cases.html
/news/2022/10/4/Calida-resigns-as-COA-chairman.html
/news/2022/3/10/Bongbong-Marcos-Abra-Build.html
/news/2022/3/8/SC-issues-TRO-Operation-Baklas.html
/news/2022/5/30/DPWH-chief-Bonoan-Build-Build-Build-traffic-woes.html
/news/2022/7/30/new-dhsud-secretary-acuzar.html
/news/2022/9/25/trains-suspend-operations-due-to-Karding.html
/news/2023/11/27/labor-groups-bonifacio-day.html
/news/2023/12/21/KAPA-masterminds-sentenced-to-life-in-prison.html
/news/2023/7/11/MTRCB-allows-Barbie-screening-in-PH.html
/news/2023/8/20/lrt1-fernando-poe-jr-station.html
/regional/2016/02/19/Former-N.-Cotabato-town-mayor-shot-dead-broad-daylight.html
/regional/2022/8/10/tarsier-presence-recorded-Tacloban-City.html
/sports/2020/6/10/Duterte-signs-National-Academy-of-Sports-Act.html
/sports/2022/3/3/Drex-Zamboanga-eyes-former-champ-Christian-Lee.html
/sports/2022/4/19/United-City-suffers-second-straight-loss.html
/sports/2023/9/19/Gilas-replacements-Asian-Games.html
/videos/2023/9/25/About-8-000-cadets-participate-in-first-ROTC-Games.html
/entertainment/2019/3/25/kathryn-bernardo-kids-choice-award-2019.html,
/sports/2018/12/19/Magnolia-Hotshots-2018-PBA-Governors-Cup-champions-Alaska-Game-6.html
/sports/2019/4/10/PBA--Magnolia-prevails-in-Manila-Clasico-series,-advances-to-semis.html
{
"dependencies": {
"node-fetch": "^3.3.2"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment