Skip to content

Instantly share code, notes, and snippets.

@Stiivi
Created February 22, 2014 18:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Stiivi/9159092 to your computer and use it in GitHub Desktop.
Save Stiivi/9159092 to your computer and use it in GitHub Desktop.
# Demo:
#
# Aggregate population per independence type for every year
# Sources: Population and Country Codes datasets
#
from bubbles import Pipeline
from bubbles import get_logger
logger = get_logger()
logger.setLevel("DEBUG")
# List of stores with datasets. In this example we are using the "datapackage"
# store
stores = {
"source": {"type": "datapackages", "url": "."},
"sql": {"type": "sql", "url": "sqlite:///data.sqlite"}
}
p = Pipeline(stores=stores)
# Set the source dataset
p.source("source", "population")
# === COPY DATA INTO SQL TABLE ===
p.create("sql", "population", replace=True)
# Prepare another dataset and keep just relevant fields
cc = p.fork(empty=True)
cc.source("source", "country-codes")
# === COPY DATA INTO SQL TABLE ===
cc.create("sql", "country-codes", replace=True)
cc.keep_fields(["ISO3166-1-Alpha-3", "is_independent"])
# Join them – left inner join
p.join_details(cc, "Country Code", "ISO3166-1-Alpha-3")
# Aggregate Value by status and year
p.aggregate(["is_independent", "Year"],
[["Value", "sum"]],
include_count=True)
# Sort for nicer output...
p.sort(["is_independent", "Year"])
# Print pretty table.
p.pretty_print()
p.run()
2014-02-22 19:02:06,965 DEBUG step 0: evaluate soure population in source
2014-02-22 19:02:06,966 DEBUG step 1: evaluate soure country-codes in source
2014-02-22 19:02:06,967 DEBUG step 2: evaluate create population in sql
2014-02-22 19:02:06,976 DEBUG append_from: appending rows into population
2014-02-22 19:02:06,985 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,004 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,020 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,037 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,054 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,076 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,094 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,112 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,129 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,146 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,164 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,181 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,192 DEBUG EXECUTE SQL: INSERT INTO population ("Country Name", "Country Code", "Year", "Value") VALUES (?, ?, ?, ?)
2014-02-22 19:02:07,194 DEBUG step 3: evaluate create country-codes in sql
2014-02-22 19:02:07,202 DEBUG append_from: appending rows into country-codes
2014-02-22 19:02:07,209 DEBUG EXECUTE SQL: INSERT INTO "country-codes" (name, name_fr, "ISO3166-1-Alpha-2", "ISO3166-1-Alpha-3", "ISO3166-1-numeric", "ITU", "MARC", "WMO", "DS", "Dial", "FIFA", "FIPS", "GAUL", "IOC", currency_alphabetic_code, currency_country_name, currency_minor_unit, currency_name, currency_numeric_code, is_independent) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2014-02-22 19:02:07,218 DEBUG step 4: evaluate operation keep_fields
2014-02-22 19:02:07,218 INFO calling keep_fields(*)
2014-02-22 19:02:07,219 WARNING operation keep_fields is experimental
2014-02-22 19:02:07,219 INFO calling field_filter(sql)
2014-02-22 19:02:07,220 DEBUG called field_filter(sql)
2014-02-22 19:02:07,220 DEBUG called keep_fields(*)
2014-02-22 19:02:07,220 DEBUG step 5: evaluate operation join_details
2014-02-22 19:02:07,220 INFO calling join_details(sql, sql)
2014-02-22 19:02:07,221 DEBUG called join_details(sql, sql)
2014-02-22 19:02:07,221 DEBUG step 6: evaluate operation aggregate
2014-02-22 19:02:07,221 INFO calling aggregate(sql)
2014-02-22 19:02:07,222 DEBUG called aggregate(sql)
2014-02-22 19:02:07,222 DEBUG step 7: evaluate operation sort
2014-02-22 19:02:07,222 INFO calling sort(sql)
2014-02-22 19:02:07,222 DEBUG called sort(sql)
2014-02-22 19:02:07,222 DEBUG step 8: evaluate operation pretty_print
2014-02-22 19:02:07,222 INFO calling pretty_print(records)
2014-02-22 19:02:07,223 DEBUG EXECUTE SQL: SELECT is_independent, "Year", "Value_sum", record_count
FROM (SELECT is_independent, "Year", sum("Value") AS "Value_sum", count(?) AS record_count
FROM (SELECT __m."Country Name" AS "Country Name", __m."Country Code" AS "Country Code", __m."Year" AS "Year", __m."Value" AS "Value", __d.is_independent AS is_independent
FROM population AS __m JOIN (SELECT __ff."ISO3166-1-Alpha-3" AS "ISO3166-1-Alpha-3", __ff.is_independent AS is_independent
FROM "country-codes" AS __ff) AS __d ON __m."Country Code" = __d."ISO3166-1-Alpha-3") GROUP BY is_independent, "Year") ORDER BY is_independent ASC, "Year" ASC
2014-02-22 18:40:53,991 DEBUG step 0: evaluate soure population in source
2014-02-22 18:40:53,992 DEBUG step 1: evaluate soure country-codes in source
2014-02-22 18:40:53,992 DEBUG step 2: evaluate operation keep_fields
2014-02-22 18:40:53,992 INFO calling keep_fields(*)
2014-02-22 18:40:53,992 WARNING operation keep_fields is experimental
2014-02-22 18:40:53,993 INFO calling field_filter(rows)
2014-02-22 18:40:53,993 DEBUG called field_filter(rows)
2014-02-22 18:40:53,993 DEBUG called keep_fields(*)
2014-02-22 18:40:53,993 DEBUG step 3: evaluate operation join_details
2014-02-22 18:40:53,993 INFO calling join_details(rows, rows)
2014-02-22 18:40:53,993 DEBUG called join_details(rows, rows)
2014-02-22 18:40:53,993 DEBUG step 4: evaluate operation aggregate
2014-02-22 18:40:53,994 INFO calling aggregate(rows)
2014-02-22 18:40:54,140 DEBUG called aggregate(rows)
2014-02-22 18:40:54,140 DEBUG step 5: evaluate operation sort
2014-02-22 18:40:54,140 INFO calling sort(rows)
2014-02-22 18:40:54,142 DEBUG called sort(rows)
2014-02-22 18:40:54,142 DEBUG step 6: evaluate operation pretty_print
2014-02-22 18:40:54,142 INFO calling pretty_print(records)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment