Create a gist now

Instantly share code, notes, and snippets.

# Demo:
#
# Aggregate population per independence type for every year
# Sources: Population and Country Codes datasets
#
from bubbles import Pipeline
# List of stores with datasets. In this example we are using the "datapackage"
# store
stores = {
"source": {"type": "datapackages", "url": "."}
}
p = Pipeline(stores=stores)
# Set the source dataset
p.source("source", "population")
# Prepare another dataset and keep just relevant fields
cc = p.fork(empty=True)
cc.source("source", "country-codes")
cc.keep_fields(["ISO3166-1-Alpha-3", "is_independent"])
# Join them – left inner join
p.join_details(pop, "Country Code", "ISO3166-1-Alpha-3")
# Aggregate Value by status and year
p.aggregate(["is_independent", "Year"],
[["Value", "sum"]],
include_count=True)
# Sort for nicer output...
p.sort(["is_independent", "Year"])
# Print pretty table.
p.pretty_print()
p.run()
+----------------------+----+------------------+------------+
|is_independent |Year|Value_sum |record_count|
+----------------------+----+------------------+------------+
|Commonwealth of US |1960|2368070.0 | 2|
|Commonwealth of US |1961|2410061.6 | 2|
|Commonwealth of US |1962|2460862.6 | 2|
|Commonwealth of US |1963|2515238.6 | 2|
|Commonwealth of US |1964|2564966.6 | 2|
|Commonwealth of US |1965|2605157.0 | 2|
|Commonwealth of US |1966|2636484.2 | 2|
...
|Part of DK |2007|105127.0 | 2|
|Part of DK |2008|104925.0 | 2|
|Part of DK |2009|104948.0 | 2|
|Part of DK |2010|105242.0 | 2|
|Part of FR |1960|28334.0 | 2|
|Part of FR |1961|29715.0 | 2|
|Part of FR |1962|31108.0 | 2|
...
|Territory of GB |2005|175444.0 | 4|
|Territory of GB |2006|179339.0 | 4|
|Territory of GB |2007|182379.0 | 4|
|Territory of GB |2008|184742.0 | 4|
|Territory of GB |2009|186691.0 | 4|
|Territory of GB |2010|188428.0 | 4|
|Territory of US |1960|119477.0 | 3|
|Territory of US |1961|123435.0 | 3|
|Territory of US |1962|127865.0 | 3|
|Territory of US |1963|132772.0 | 3|
|Territory of US |1964|138016.0 | 3|
|Territory of US |1965|143391.0 | 3|
|Territory of US |1966|149372.0 | 3|
...
|Yes |2006|6539418656.998454 | 191|
|Yes |2007|6616972256.680267 | 191|
|Yes |2008|6695249310.393451 | 191|
|Yes |2009|6773605967.338223 | 191|
|Yes |2010|6851909862.0 | 191|
+----------------------+----+------------------+------------+
@balsagoth

On line 25 shouldn't be cc instead pop?

p.join_details(cc, "Country Code", "ISO3166-1-Alpha-3")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment