Skip to content

Instantly share code, notes, and snippets.

View agricolamz's full-sized avatar

George Moroz agricolamz

View GitHub Profile
library(tidyverse)
library(scholar)
my_id <- "ka_iMFQAAAAJ"
my_pubs <- get_publications(my_id)
map_chr(my_pubs$pubid, function(i){
res <- get_publication_abstract(id = my_id, pub_id = i)
ifelse(length(res) > 0, res, "")
library(tidyverse)
library(bib2df)
bib2df("verbal_negation.bib") |>
rowwise() |>
mutate(AUTHOR = str_c(AUTHOR, collapse =" and "),
EDITOR = str_c(EDITOR, collapse =" and ")) |>
write_tsv("verbal_negation.tsv")
bib2df("verbal_negation_spec.bib") |>
rowwise() |>
library(tidyverse)
read_lines("revizor.txt") |>
str_squish() |>
tibble(text = _) |>
filter(!str_detect(text, "^ДЕЙСТВИЕ"),
!str_detect(text, "^Явление"),
text != "") |>
mutate(id = 1:n()) ->
revizor
library(tidyverse)
library(phonfieldwork)
files <- list.files(pattern = "TextGrid")
walk(files, function(file){
textgrid <- textgrid_to_df(file)
textgrid |>
mutate(content = str_extract(content, "^.*?-"),
content = str_remove(content, "-"),
content = str_replace_all(content, "SS", "ss"),
library(tidyverse)
t <- pdftools::pdf_ocr_text("Khan 2008 Jewish Neo-Aramaic Dialect of Urmi-465-497.pdf")
tibble(text = str_split(t, "\n\n") |> unlist()) |>
filter(!str_detect(text, "GLOSSARY OF VERBS"),
nchar(text) > 4) |>
slice(-c(1:2)) |>
mutate(verb = str_extract(text, "\\S{1,}\\s"),
verb = str_squish(verb),
text = str_remove_all(text, "\n")) |>
speech_to_text <- function(audio,
output_name = "output",
model_path = "ggml-large-v3.bin"){
library(tidyverse)
library(audio.whisper)
# convert to the format specs ---------------------------------------------
tmp <- tempdir()
str_glue("ffmpeg -i {audio} -ar 16000 -ac 1 -c:a pcm_s16le {tmp}/{output_name}.wav") |>
system()
library(stringi)
coresp <- "
ῶι > ῷ;
ωι > ῳ;
ὧι > ᾧ;
ὦι > ᾦ;
ηι > ῃ;
ῆι > ῇ;
ἦι > ᾖ;
library(tidyverse)
read_csv("russian_libraries_subdataset.csv") |>
count(dedication, sort = TRUE)
library(tidyverse)
df <- read_csv2("data.csv")
df$CASE_LBL
df |>
select(-CASE_LBL) |>
t() |>
as_tibble() ->
transposed
library(tidyverse)
library(ggiraph)
df <- read_csv("russian_journals_vectorized.csv")
df |>
mutate(for_tooltip = str_c(journal_title, "<br><br>",
author, " (",
year, ") ",
title, "<br><br>",
annotation)) |>