Skip to content

Instantly share code, notes, and snippets.

@jcrist
Created August 18, 2022 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrist/f808ceba485ab1ca7580299297b59281 to your computer and use it in GitHub Desktop.
Save jcrist/f808ceba485ab1ca7580299297b59281 to your computer and use it in GitHub Desktop.
Snippets used in a twitter thread about ibis-datasette
In [1]: import ibis
In [2]: ibis.options.interactive = True # enable interactive mode
In [3]: con = ibis.sqlite.connect("legislators.db") # connect to a database
In [4]: legislators = con.tables["legislators"] # access tables
In [5]: legislators.groupby("bio_gender").count() # query using a dataframe-like API
Out[5]:
bio_gender count
0 F 399
1 M 12195
In [6]: terms = con.tables["legislator_terms"]
In [7]: first_female_rep = (
...: legislators
...: .join(terms, legislators.id == terms.legislator_id)
...: .filter(lambda _: _.bio_gender == "F")
...: .select("name", "state", "start")
...: .sort_by("start")
...: .limit(1)
...: )
In [8]: first_female_rep
Out[8]:
name state start
0 Jeannette Rankin MT 1917-04-02
In [9]: percent_female_by_decade = (
...: legislators
...: .join(terms, legislators.id == terms.legislator_id)
...: .select("bio_gender", "start")
...: .mutate(
...: decade=lambda _: (ibis.date(_.start).year() / 10).cast("int32") * 10
...: )
...: .group_by("decade")
...: .aggregate(
...: n_female=lambda _: (_.bio_gender == "F").sum(),
...: n_total=lambda _: _.count()
...: )
...: .mutate(
...: percent_female=lambda _: 100 * (_.n_female / _.n_total)
...: )
...: .filter(lambda _: _.percent_female > 0)
...: .select("decade", "percent_female")
...: )
In [10]: percent_female_by_decade
Out[10]:
decade percent_female
0 1910 0.040584
1 1920 0.883179
2 1930 1.608363
3 1940 1.845166
4 1950 3.030303
5 1960 2.718287
6 1970 3.592073
7 1980 4.977188
8 1990 10.830922
9 2000 15.865783
10 2010 20.196641
11 2020 27.789047
In [11]: ibis.show_sql(percent_female_by_decade)
SELECT
t0.decade,
t0.percent_female
FROM (
SELECT
t1.decade AS decade,
t1.n_female AS n_female,
t1.n_total AS n_total,
t1.percent_female AS percent_female
FROM (
SELECT
t2.decade AS decade,
t2.n_female AS n_female,
t2.n_total AS n_total,
(
t2.n_female / CAST(t2.n_total AS REAL)
) * 100 AS percent_female
FROM (
SELECT
t3.decade AS decade,
SUM(CAST(t3.bio_gender = 'F' AS INTEGER)) AS n_female,
COUNT('*') AS n_total
FROM (
SELECT
t4.bio_gender AS bio_gender,
t4.start AS start,
CAST(CAST(STRFTIME('%Y', DATE(t4.start)) AS INTEGER) / CAST(10 AS REAL) AS INTEGER) * 10 AS decade
FROM (
SELECT
bio_gender,
start
FROM main.legislators AS t5
JOIN main.legislator_terms AS t6
ON t5.id = t6.legislator_id
) AS t4
) AS t3
GROUP BY
t3.decade
) AS t2
) AS t1
WHERE
t1.percent_female > 0
) AS t0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment