Cristian Vargas cvargas-xbrein

## humanity_globe.R
library(rayshader)
library(rayrender)

popdata = raster::raster("gpw_v4_population_density_rev11_2020_15_min.tif")

population_mat = rayshader:::flipud(raster_to_matrix(popdata))

above1 = population_mat > 1
above5 = population_mat > 5
above10 = population_mat > 10

## cachulo-aves.markdown

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cvargas-xbrein
                / cachulo-aves.markdown
            
            
              Created
              December 7, 2022 02:24
            
              
                Cachulo aves 
              
          
    Cachulo aves

A Pen by Cristian Vargas on CodePen.
License.

  
## jenks2.py
import json
from pprint import pprint as pp

def jenks_matrices_init(data, n_classes):
    #fill the matrices with data+1 arrays of n_classes 0s
    lower_class_limits = []
    variance_combinations = []
    for i in xrange(0, len(data)+1):
        temp1 = []
        temp2 = []

## amazon_athena_create_table.ddl
CREATE EXTERNAL TABLE IF NOT EXISTS default.table
(
  `id` int,
  `name` string,
  `timestamp` string,
  `is_debug` boolean
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
  'escapeChar'='\\',

## pyspark_jdbc_df_count.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cvargas-xbrein
                / pyspark_jdbc_df_count.md
            
            
              Created
              May 10, 2022 13:34
                — forked from tilakpatidar/pyspark_jdbc_df_count.md
            
              
                Gist to perform count() on jdbc sources without re-reading the df
              
          
    Postgres snippet

create database test_db;

create table t_random as select s, md5(random()::text) from generate_Series(1,5000) s;
Pyspark snippet

In [1]: df=spark.read.jdbc(url="jdbc:postgresql://localhost:5432/test_db", table="t_random", properties={"driver": "org.postgresql.Driver"}).repartition(10)


## install-units.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cvargas-xbrein
                / install-units.md
            
            
              Created
              December 17, 2021 12:45
                — forked from slowkow/install-units.md
            
              
                Install the 'units' R package on Partners
              
          
    Summary

I had a difficult time installing the units R package on the Partners ERIS servers.
I hope this post helps you to figure out how to work around the errors.
Instructions


## Install_FFmepg_OpenCV_EMR.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cvargas-xbrein
                / Install_FFmepg_OpenCV_EMR.md
            
            
              Created
              September 15, 2021 16:09
                — forked from phonchi/Install_FFmepg_OpenCV_EMR.md
            
              
                Intall FFmepg and OpenCV on AWS EMR
              
          
    sudo yum -y update
sudo yum -y groupinstall 'Development Tools'
sudo yum install -y cmake git pkgconfig
sudo yum install -y libpng-devel libjpeg-turbo-devel jasper-devel openexr-devel libtiff-devel libwebp-devel
sudo yum install -y libdc1394-devel libv4l-devel gstreamer-plugins-base-devel
sudo yum install -y gtk2-devel
sudo yum install -y tbb-devel eigen3-devel
wget https://bootstrap.pypa.io/get-pip.py
sudo python get-pip.py

  
## gluejob.py
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from pyspark.sql.functions import *
from awsglue.dynamicframe import DynamicFrame


## airflow-python3.sh
# this script has been tested and worked in a freshly installed Ubuntu 16.04 and 16.10
# it assumes that you are running airflow in a private netowrk and no need to be worry about outside access
# if that's not the case, the lines for PostgreSQL and Redis in this script need to be updated accordingly
# run as root
sudo su

# initial system updates and installs
apt-get update && apt-get upgrade -y && apt-get autoremove && apt-get autoclean

apt-get -y install build-essential binutils gcc make git htop nethogs tmux

## setup_connections.py
#!/usr/bin/env python3

from __future__ import print_function

import os
import sys

from airflow import settings
from airflow.models import Connection
from sqlalchemy.orm import exc
	library(rayshader)
	library(rayrender)

	popdata = raster::raster("gpw_v4_population_density_rev11_2020_15_min.tif")

	population_mat = rayshader:::flipud(raster_to_matrix(popdata))

	above1 = population_mat > 1
	above5 = population_mat > 5
	above10 = population_mat > 10
	import json
	from pprint import pprint as pp

	def jenks_matrices_init(data, n_classes):
	#fill the matrices with data+1 arrays of n_classes 0s
	lower_class_limits = []
	variance_combinations = []
	for i in xrange(0, len(data)+1):
	temp1 = []
	temp2 = []
	CREATE EXTERNAL TABLE IF NOT EXISTS default.table
	(
	`id` int,
	`name` string,
	`timestamp` string,
	`is_debug` boolean
	)
	ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
	WITH SERDEPROPERTIES (
	'escapeChar'='\\',
	import sys
	from awsglue.transforms import *
	from awsglue.utils import getResolvedOptions
	from pyspark.context import SparkContext
	from awsglue.context import GlueContext
	from awsglue.job import Job
	from pyspark.sql.functions import *
	from awsglue.dynamicframe import DynamicFrame
	# this script has been tested and worked in a freshly installed Ubuntu 16.04 and 16.10
	# it assumes that you are running airflow in a private netowrk and no need to be worry about outside access
	# if that's not the case, the lines for PostgreSQL and Redis in this script need to be updated accordingly
	# run as root
	sudo su

	# initial system updates and installs
	apt-get update && apt-get upgrade -y && apt-get autoremove && apt-get autoclean

	apt-get -y install build-essential binutils gcc make git htop nethogs tmux
	#!/usr/bin/env python3

	from __future__ import print_function

	import os
	import sys

	from airflow import settings
	from airflow.models import Connection
	from sqlalchemy.orm import exc