Last active
July 21, 2022 08:41
-
-
Save knbknb/a8c9fa96c13259e1122edac4ca5ca840 to your computer and use it in GitHub Desktop.
BigQuery snippets (for command line tool bq)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
--## a longer and more complex query: | |
--## 6 tags on stackoverflow.com: | |
--## monthly posts by tagcount per month, since 2008 | |
*/ | |
with cte as ( | |
select | |
t.tag_name, | |
LAST_DAY(date(creation_date)) as month | |
from | |
`bigquery-public-data.stackoverflow.posts_questions` p | |
join `bigquery-public-data.stackoverflow.tags` t on t.tag_name = p.Tags | |
where | |
Tag_Name in ( | |
/* 'r', | |
'python', | |
'javascript', | |
'java', | |
'php',*/ | |
'svg', 'xslt', 'xml', 'xpath', 'xsl', 'xsd', 'xml-schema', | |
'json', 'json-schema', 'json-ld', 'json-ld-schema' | |
) | |
) | |
select | |
*, | |
count(*) as tag_cnt | |
from | |
cte | |
group by | |
tag_name, | |
month | |
order by | |
Month asc, | |
tag_name asc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# bigquery-bq-commands--collection.sh | |
# knb 2022-07 | |
# Tagcount by month on stackoverflow.com | |
# call: ./bigquery-bq-commands--collection.sh -t r,c,c++ | |
# edit commands as needed. | |
function query { | |
local tags=$(perl -E "say join q(,), map {qq('\$_')} split(',', qq($1))") | |
cd /var/tmp; | |
echo tags=$tags | |
# url_gh="https://gist.githubusercontent.com/knbknb" | |
# # Attention: gist fragment cah change with every save! | |
# gist="a8c9fa96c13259e1122edac4ca5ca840/raw/3ab980dcca392cf958e0ea28e359c49b01e1ee67/" | |
# sql="bigquery--stackoverflow-tagcount-per-month.sql" | |
# tagcount_by_month=$(curl -sL "${url_gh}${gist}${sql}") | |
# url_sql="${url_gh}${gist}${sql}" | |
# 'clojure', 'scala', 'haskell', 'erlang', 'common-lisp' | |
# tagcount_by_month=$(curl -sL "${url_sql}") | |
tagcount_by_month=$(cat <<EOD | |
with cte as ( | |
select | |
t.tag_name, | |
LAST_DAY(date(creation_date)) as month | |
from | |
bigquery-public-data.stackoverflow.posts_questions p | |
join bigquery-public-data.stackoverflow.tags t on t.tag_name = p.Tags | |
where | |
Tag_Name in ( | |
$tags | |
) | |
) | |
select | |
tag_name, | |
month, | |
count(*) as tag_cnt | |
from | |
cte | |
group by | |
tag_name, | |
month | |
order by | |
Month asc, | |
tag_name asc | |
EOD | |
) | |
echo "${tagcount_by_month}" | |
# various important customizations are here: $HOME/.bigqueryrc | |
# execute query | |
datafile_csv=bigquery_resultdata.csv | |
# if [ -f "${datafile_csv}" ]; then | |
# perl -E "say 'x' x 50;" | |
# echo "${datafile_csv} already exists, skipping query" | |
# perl -E "say 'x' x 50;" | |
# else | |
# echo "$tagcount_by_month" | bq query > ${datafile_csv} | |
# fi | |
query="bigquery--stackoverflow-tagcount-per-month" | |
echo "$tagcount_by_month" | bq query > ${datafile_csv} | |
outfile_svg="${query}.svg" | |
if [ -f "${datafile_csv}" ]; then | |
if [ ! -s "${datafile_csv}" ]; then | |
echo rm "${datafile_csv}" | |
rm "${datafile_csv}" | |
else | |
r_code=$(cat <<EOR | |
theme_set(theme_light()); | |
image <-df %>% | |
mutate(month=as.Date(month)) %>% | |
mutate_if(is.character, as.factor) %>% | |
ggplot() + | |
geom_line(aes(x=month,y=tag_cnt,group=tag_name,color=tag_name)) + | |
scale_x_date(date_breaks = '3 months' , date_labels = '%b-%y') + | |
theme(axis.text.x = element_text(angle=90)) + | |
labs(title='Tags by month on stackoverflow.com', | |
subtitle=sprintf('2008 - %s', lubridate::year(lubridate::now()))); | |
ggsave(filename='$outfile_svg', plot=image, width=10, height=8) | |
EOR | |
) | |
# generate the plot and save to disk, as SVG | |
< ${datafile_csv} Rio -r -d, -g -e "$r_code" | |
fi | |
fi | |
if [ -f "${outfile_svg}" ]; then | |
if [ "${outfile_svg}" -ef ".svg" ]; then | |
echo "'${outfile_svg}' -ef '.svg'" | |
else | |
nohup google-chrome ${outfile_svg} & | |
fi | |
else | |
echo "no svg file created, '${outfile_svg}' is bad or empty" | |
fi | |
} | |
OPTIONS=`getopt -o t: --long tags: -n 'bigquery-bq-commands--collection.sh' -- $@` | |
eval set -- "$OPTIONS" | |
while true; do | |
case $1 in | |
-t|--tags) | |
case $2 in | |
([a-zA-Z,]*) query $2; shift 2;; | |
*) >&2 echo Please provide a string of SO tags in the form of "r,python,perl"; shift 2;; | |
esac;; | |
--) shift; break;; | |
*) >&2 echo "[Error] The given command line argument is invalid, please try again"; exit 1;; | |
esac | |
done | |
exit 0; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# see Question on SO | |
# https://stackoverflow.com/questions/43195143/is-there-a-way-to-export-a-bigquery-tables-schema-as-json | |
# | |
bq show --format prettyjson bigquery-public-data:samples.wikipedia | |
# | |
bq show --format prettyjson bigquery-public-data:samples.natality | |
# only column defs, with jq | |
bq show --format=prettyjson bigquery-public-data:samples.wikipedia | jq '.schema.fields' | |
# same thing, no need for jq | |
bq show --schema --format=prettyjson bigquery-public-data:samples.natality | |
# 2-column output, just name and description, e.g. for data dictionaries | |
bq show --schema --format=prettyjson bigquery-public-data:samples.natality | jq ".[] | {(.name?): .description}" | sort -u | |
# similar, but just 1 string per line | |
bq show --schema --format=prettyjson bigquery-public-data:samples.natality | jq '.. | (.name?) + " : " + (.description?) ' | sort | |
# with cleanup in perl | |
bq show --schema --format=prettyjson bigquery-public-data:samples.natality > /tmp/natality.json | |
< /tmp/natality.json jq '[sort_by(.name) | ..| .name? + ": " + .description? ]' \ | |
| perl -pE "s/\\\n/, /g; s/'/\\\\'/g; " | perl -pE 's/\\"//g' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment