Skip to content

Instantly share code, notes, and snippets.

@luthfianto
Last active November 21, 2018 16:45
Show Gist options
  • Save luthfianto/28bf95b8ddc80010cf7852ef4fc85927 to your computer and use it in GitHub Desktop.
Save luthfianto/28bf95b8ddc80010cf7852ef4fc85927 to your computer and use it in GitHub Desktop.
const sqlite3 = require("sqlite3")
class SqliteDF {
table: any;
created: boolean;
con: any;
max_index: number;
_group_cols: any;
verbose: boolean;
_cache: any;
constructor(con = null, table = null, group_cols = null, cache = null, verbose = false) {
`
Keyword arguments:
con -- A SQLite connection object.
table -- the name of the table associated with this dataframe
group_cols -- for grouping operations, the set of columns we are grouping by
cache -- shared across instances in order to memoize select statements
verbose - print SQL statements.
`
if (!table) {
this.table = this._make_table_name()
this.created = false
} else {
this.table = table
this.created = true
}
if (!con) {
this.con = sqlite3.connect(':memory:')
this.max_index = 0
} else {
this.con = con
this.max_index = this.con.cursor().execute('select max(ROWID) from {}'.format(this.table)).fetchone()[0]
}
this._group_cols = group_cols
this._cache = cache|| {}
this.verbose = verbose
}
private toString() {
return 'SqliteDF({} x {})'.format(this.max_index, len(this.keys()))
}
private __setitem__(this, key, values) {
`Primary way to get data in. Sets one column, which must be aligned with existing columns.
Arguments:
key -- the name of the column to create
value -- an iterable of values the column takes, must be ordered
`
if (this.created) {
this.con.execute('alter table {} add column {} int'.format(this.table, key))
} else {
this.con.cursor().execute('create table {} ({} int)'.format(this.table, key))
this.created = true
}
if (len(values) > this.max_index) {
to_insert = [(i,) for i in range(this.max_index, len(values))]
this.con.cursor().executemany('insert into {} (ROWID) values (?)'.format(this.table, key), to_insert)
this.max_index = len(values)
}
this.con.cursor().executemany('update {} set {}=? where ROWID=?'.format(this.table, key),
[(v, i) for i, v in enumerate(values)])
}
private keys() {
`Return all columns in the data frame.`
const tables = this.con.cursor().execute("pragma table_info({})".format(this.table)).fetchall()
return [t[1] for t in tables]
}
private __getitem__(this, key) {
`Get an individual column from the data frame.`
const rows = this.con.cursor().execute('select {} from {} order by ROWID'.format(key, this.table)).fetchall()
return [row[0] for row in rows]
}
private print(...args: any[]) {
`Render the data frame to a string and print.`
const rows = this.con.cursor().execute('select * from {}'.format(this.table)).fetchall()
print(tabulate(rows, headers = this.keys()))
}
private tabulate(...args: any[]) {
return args;
}
private _make_table_name() {
`Generate a random name for a create table statement.`
return ''.join(random.choices(string.ascii_letters[: 26], k = 10))
}
private _create_table(this, select, memoize=true){
`Create a new table and return a data frame linked to it.
Arguments:
select -- a query that selects data from the current set of available tables.
memoize -- whether to cache the select query or re-run it.
`
if (memoize && select in this._cache){
const table = this._cache[select]
return SqliteDF(this.con, table, cache=this._cache)}
const table = this._make_table_name()
const q = 'create table {} as {}'.format(table, select)
if (this.verbose)
print(select)
this.con.cursor().execute(q)
if (memoize)
this._cache[select] = table
return new SqliteDF(this.con, table, this._cache)
}
public head(n: number=5){
const q = 'select * from {} order by ROWID limit {}'.format(this.table, n)
return this._create_table(q)
}
private tail(n: number=5){
const q = 'select * from {} order by ROWID desc limit {}'.format(this.table, n)
return this._create_table(q)
}
// private group_by(this, *columns):
// `Returns the same data frame with group columns added.`
// return SqliteDF(this.con, this.table, group_cols=columns, cache=this._cache)
// private ungroup(){}
// `Returns the same data frame with group columns removed.`
// return SqliteDF(this.con, this.table, group_cols=null, cache=this._cache)
// private count(){}
// if not this._group_cols:
// q = 'select count(1) as n from {}'.format(this.table)
// else:
// q = 'select {}, count(1) as n from {} group by {}'.format(
// ', '.join(this._group_cols),
// this.table,
// ', '.join(this._group_cols),
// )
// return this._create_table(q)
// private summarise(this, **exprs):
// summaries_list = []
// for key, value in exprs.items():
// summaries_list.append('{} as {}'.format(value, key))
// summaries = ', '.join(summaries_list)
// if not this._group_cols:
// q = 'select {} from {}'.format(summaries, this.table)
// else:
// q = 'select {}, {} from {} group by {}'.format(
// ', '.join(this._group_cols),
// summaries,
// this.table,
// ', '.join(this._group_cols),
// )
// return this._create_table(q)
// private select(this, *columns):
// q = 'select {} from {}'.format(
// ', '.join(columns),
// this.table,
// )
// return this._create_table(q)
// private mutate(this, **exprs):
// mutations_list = []
// for key, value in exprs.items():
// mutations_list.append('{} as {}'.format(value, key))
// keys = this.keys()
// mutations = ', '.join(mutations_list)
// q = 'select {}, {} from {}'.format(
// ', '.join(keys),
// mutations,
// this.table,
// )
// return this._create_table(q)
// private inner_join(this, other_df):
// left_keys = this.keys()
// right_keys = other_df.keys()
// overlap = [lk for lk in left_keys if lk in right_keys]
// on_statement = ' and '.join('a.{0} = b.{0}'.format(col) for col in overlap)
// cols_to_select = (
// ['a.{}'.format(o) for o in overlap]
// + ['a.{}'.format(lk) for lk in left_keys if lk not in overlap]
// + ['b.{}'.format(rk) for rk in right_keys if (rk not in overlap) and (rk not in left_keys)]
// )
// q = 'select {} from {} a inner join {} b on {}'.format(
// ', '.join(cols_to_select),
// this.table,
// other_df.table,
// on_statement,
// )
// return this._create_table(q)
// private bind_rows(this, other_df):
// left_keys = this.keys()
// right_keys = other_df.keys()
// overlap = [lk for lk in left_keys if lk in right_keys]
// cols = ', '.join(overlap)
// q = 'select {} FROM {} UNION ALL SELECT {} FROM {}'.format(
// cols,
// this.table,
// cols,
// other_df.table,
// )
// return this._create_table(q)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment