Skip to content

Instantly share code, notes, and snippets.

Avatar

Anthony Cros anthony-cros

View GitHub Profile
View DataFramesAndTablesInScalaArticle.scala
import gallia._ // see https://github.com/galliaproject/gallia-core/blob/init/README.md#dependencies
// ===========================================================================
object DataFramesAndTablesInScalaArticle {
// https://darrenjw.wordpress.com/2015/08/21/data-frames-and-tables-in-scala/
// ---------------------------------------------------------------------------
def main(args: Array[String]): Unit = {
"/data/misc/cars93.csv".stream()
.filterBy("EngineSize").matches(_ <= 4.0)
View ReshapingDataWithPivotInSparkArticle.scala
import gallia._
import aptus._ // for divideBy and significantFigures utilities
// ===========================================================================
object ReshapingDataWithPivotInSparkArticle {
// reproduces first query in https://databricks.com/blog/2016/02/09/reshaping-data-with-pivot-in-apache-spark.html
// ---------------------------------------------------------------------------
// t210224102310 - not actually implemented yet, but will also use
// https://github.com/galliaproject/gallia-core/blob/init/src/main/scala/gallia/atoms/AtomsIX.scala#L110 for local runs
View StartUsingSparkWithScala.scala
import gallia._ // see https://github.com/galliaproject/gallia-core/blob/init/README.md#dependencies
// ===========================================================================
object StartUsingSparkWithScala { // see https://towardsdatascience.com/stop-using-pandas-and-start-using-spark-with-scala-f7364077c2e0 by Chloe Connor
// test meta
case class FootballTeam(
name : String,
league : String,
matches_played : Int,
@anthony-cros
anthony-cros / WordCount.scala
Created Feb 23, 2021
Reproduces go-to Word Count example
View WordCount.scala
import gallia._
// ===========================================================================
object WordCount { // see http://spark.apache.org/examples.html and https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html
def main(args: Array[String]): Unit = {
// INPUT: "Hello World Bye World\nHello Hadoop Goodbye Hadoop"
// ---------------------------------------------------------------------------
View VogelzangMediumArticle.scala
object MediumArticle extends App { // reproducing example from https://medium.com/@thijser/doing-cool-data-science-in-java-how-3-dataframe-libraries-stack-up-5e6ccb7b437#
import gallia._ // see https://github.com/galliaproject/gallia-core/blob/init/README.md#dependencies
// ---------------------------------------------------------------------------
/*
for reference, pandas way provided in the article:
data = pd.read_csv('urb_cpop1_1_Data.csv')
filtered = data.drop(data[data.Value == ":"].index)
filtered['key'] = filtered['CITIES'] + ':' + filtered['INDIC_UR']
View MyApp.scala
package minimal
import org.scalajs.dom
import scala.scalajs.js
object App {
def main(args: Array[String]): Unit = {
// prints to console as expected
View Bindings.scala
package minimal
import scala.scalajs.js
import scala.scalajs.js.annotation.JSImport
object Bindings {
// ===========================================================================
// ReactDOM
View test.jsx
import React, { Component } from 'react';
import logo from './logo.svg';
import './App.css';
class App extends Component {
render() {
return (
<div className="App">
<header className="App-header">
<img src={logo} className="App-logo" alt="logo" />
View disease_express_ql_examples
{ "stage": "4" }
{ "$eq": { "mycn_status":"amplified" }}
{ "$not": { "stage": 4 }}
{ "$not": { "$lt": { "stage": 4 }}}
{ "$not": { "$not": { "stage": "4" }}}
{ "$not": { "$not": { "$lt": { "stage": 4 }}}}
{ "risk": ["high","low"] }
{ "$nin": { "risk": ["high","low"] }}
{ "$not": { "$in": { "risk": ["high","low"] }}}
{ "$or": [{ "$eq": { "mycn_status":"amplified" }},{ "stage": "4" } ] }
View data.test
# notes:
# - use 4 spaces for display tab
# - repeated tabs get trimmed to one
# - anything beyond ' #' is trimmed (including marker), so are empty lines (post line-trim)
# - \N stand for no value (eg NULL in SQL)
# - PATID, ENCOUTERID and other IDs are inferred from the _id field, including redundant fields
===========================================================================
DEMOGRAPHIC