Skip to content

Instantly share code, notes, and snippets.

View dubsnipe's full-sized avatar

Emilio Velis dubsnipe

View GitHub Profile
@dubsnipe
dubsnipe / titles.R
Created November 11, 2020 04:09
Extract title from a list of URLS
require(tidyverse)
require(rvest)
toolbox <- tibble(read.csv("toolbox.csv", header=T))
read_title <- function(x){
tmp <- read_html(x) %>% html_node("title") %>% html_text()
return(tmp)
}
require(rvest, quietly=T)
require(dplyr, quietly=T)
require(scholar, quietly=T)
index <- seq(from=81, to=90)
# Page number from which results are drawn. This is done in parts to avoid getting error 429.
# https://stackoverflow.com/questions/43461907/in-r-how-do-i-combine-two-xml-documents-into-one-document
xml0 <- read_xml("<html></html>")
for(i in index){
github2mediawiki <- function(manifest_url) {
okh_tmp <- tempfile()
download.file(paste0(manifest_url, "?raw=true"), okh_tmp)
manifest <- read_yaml(okh_tmp)
manifest_text <- paste0(
"{{Infobox device\n",
"|image=", manifest$image, "<!-- Please consider uploading image to Appropedia. -->\n",
"|description=", manifest$description, "\n",
"|keywords=", manifest$keywords, "\n",
@dubsnipe
dubsnipe / extract_infobox.R
Created April 10, 2021 00:34
Script to extract infoboxes from Appropedia dumps
require(xml2, quietly=T)
require(tidyverse, quietly=T)
require(lubridate, quietly=T)
require(tidytext, quietly=T)
require(stringr, quietly=T)
pages <- read_xml("Appropedia-20210409194434.xml")
pages_list <- as_list(pages)
pages_tibble <- as_tibble(
<?php
set_time_limit(0);
$pages = $_POST["the_text"];
$pages_separated = explode(PHP_EOL,$pages);
$pages_separated = preg_replace("/\s/", "_", $pages_separated);
$pages_separated = preg_replace("/^_/", "", $pages_separated);
$pages_separated = preg_replace("/_$/", "", $pages_separated);
//echo json_encode($pages_separated)."\n";
@dubsnipe
dubsnipe / zoom_attendance.R
Last active January 22, 2023 21:01
Script used to calculate attendance to classes for students over Zoom.
require(tidyverse)
require(lubridate)
setwd("") #
files <- (Sys.glob("*.csv"))
topics <- lapply(files, read.csv, nrows=2, header=T, encoding="UTF-8")
topics <- lapply(topics, function(x) x["Topic"][1,]) %>% unlist()
ldf <- lapply(files, read.csv, skip=3, header=T, encoding="UTF-8")
// ==UserScript==
// @name Wikimedia Commons Attribution
// @namespace http://emiliovelis.com
// @version 0.1
// @description This script forms the markup code to use Wikimedia Commons images including attribution information.
// @require https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.8/clipboard.min.js
// @match https://*.wikimedia.org/*
// ==/UserScript==
@dubsnipe
dubsnipe / rss_downloader.R
Last active January 22, 2023 22:37
R script to download mp3 files from my fav podcasts
require(dplyr)
require(stringr)
require(xml2)
require(lubridate)
rss_feed = ""
## Script created with help from ChatGPT
find_all_mp3 <- function(rss_feed){
require(httr)
require(jsonlite)
require(dplyr)
query_titles <- function(apfrom = ""){
api_url <- paste0("https://www.appropedia.org/w/api.php?action=query&list=allpages&aplimit=500&format=json&apfrom=", apfrom)
res <- GET(api_url)
data = fromJSON(rawToChar(res$content))
data_tibble = tibble(data$query$allpages) %>% filter(ns == 0) %>% select(title)
return(data_tibble)
setwd("C:/Users/rudes/Documents/2023/appropedia/data dump")
require(jsonlite)
require(data.table)
require(tidyverse)
base_url <- "https://www.appropedia.org/w/api.php?action=query&list=allpages&aplimit=500&format=json&apfrom="
to_df <- function(apfrom="\"Backpack\"_Snap-fit_Clips"){