Skip to content

Instantly share code, notes, and snippets.

View voycey's full-sized avatar

Dan Voyce voycey

  • Melbourne
View GitHub Profile
@voycey
voycey / routes
Created April 29, 2014 11:30 — forked from egalles79/routes
$subdomain = substr( env("HTTP_HOST"), 0, strpos(env("HTTP_HOST"), ".") );
Configure::write('captacion', '');
if( strlen($subdomain)>0 && $subdomain != "m" ) {
Router::connect('*',array('controller'=>'private','action'=>'promote'));
// Router::connect('/foo', array('controller'=>'mobiles','action'=>'foo'));
Configure::write('captacion', $subdomain);
}
%%time
import dask_cudf as dc
ddf = dc.read_csv('/data/Data Files/Vegas/datafiles/csv/*.csv.gz', compression='gzip')
%%time
ddf = ddf.repartition(npartitions=3000)
%%time
ddf.to_orc('/data/Data Files/Vegas/datafiles/orc/')
@voycey
voycey / join.sql
Last active December 31, 2019 05:34
SQL
SELECT m.id, d.name, d.brand, d.wkt from dealerships d
LEFT JOIN movement m ON ST_CONTAINS(d.wkt, ST_POINT(m.lng, m.lat))
WHERE d.brand IN ('AUDI', 'BMW')
@voycey
voycey / pip.c
Created December 31, 2019 05:41
Point in Polygon CUDA C code
int pnpoly(int nvert, float *vertx, float *verty, float testx, float testy)
{
int i, j, c = 0;
for (i = 0, j = nvert-1; i < nvert; j = i++) {
if ( ((verty[i]>testy) != (verty[j]>testy)) &&
(testx < (vertx[j]-vertx[i]) * (testy-verty[i]) / (verty[j]-verty[i]) + vertx[i]) )
c = !c;
}
return c;
}
@voycey
voycey / geospatialtest.sql
Created January 2, 2020 04:16
example Geospatial query
SELECT count(distinct(id)) from datatable d
LEFT JOIN polygons p ON ST_DWITHIN(ST_MAKEPOINT(d.lon,d.lat), p.geom, 0.1)
WHERE p.place_cat IN ('beauty', 'sports')
@voycey
voycey / nem12csv_to_athena.py
Last active July 19, 2021 02:06
NEM12 Transformation
mapped_readings = ApplyMapping.apply(frame=datasource, mappings=[
("meter_id", "string", "meter_id", "string"),
("period_start", "string", "period_start", "string"),
("period_end", "string", "period_end", "string"),
("e1", "double", "export_interval.e1", "double"),
("e2", "double", "export_interval.e2", "double"),
("e3", "double", "export_interval.e3", "double"), #NB: this can technically have many more circuits
("quality_method", "string", "quality_method", "string"),
("event", "string", "event", "string"),
("b1", "double", "import_interval.b1", "double"),
@voycey
voycey / convenience_fields.py
Created July 19, 2021 02:07
Convenience fields
mapped_readings_df = mapped_readings_df \
.withColumn("week_of_year", F.weekofyear(reading_time)) \
.withColumn("date_str", F.regexp_replace(F.col("period_end").substr(1,10), "-", "")) \
.withColumn("day_of_month", F.dayofmonth(reading_time)) \
.withColumn("month", F.month(reading_time)) \
.withColumn("year", F.year(reading_time)) \
.withColumn("hour", F.hour(reading_time)) \
.withColumn("minute", F.minute(reading_time)) \
.withColumn("reading_date_time", reading_time) \
.withColumn("ingestion_date", F.current_timestamp()) \
@voycey
voycey / write_to_quest.py
Created July 19, 2021 02:14
Load into QuestDB
def write_to_quest(df_row):
HOST = args['questdb_host']
PORT = int(args['questdb_port'])
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
sock.connect((HOST, PORT))
sock.sendall(_row_to_line_protocol(df_row).encode())
except socket.error as e: