Skip to content

Instantly share code, notes, and snippets.

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from pyspark.sql.functions import *
from awsglue.dynamicframe import DynamicFrame
@vvgsrk
vvgsrk / glue-spark-shell-scala-commands.scala
Last active August 23, 2019 02:33
AWS Glue spark-shell scala commands
// Invoke Spark Shell
$ glue-spark-shell -v --properties-file /home/glue/glue_spark_shell.properties --packages com.databricks:spark-avro_2.11:4.0.0
// Import Required Classes
import org.apache.spark.SparkContext
import com.amazonaws.services.glue.GlueContext
import com.amazonaws.services.glue.DynamicFrame
import com.amazonaws.services.glue.DynamicRecord
import com.amazonaws.services.glue.MappingSpec
import com.amazonaws.services.glue.errors.CallSite
from pyspark.sql.types import *
from pyspark.sql.functions import *
#Flatten array of structs and structs
def flatten(df):
# compute Complex Fields (Lists and Structs) in Schema
complex_fields = dict([(field.name, field.dataType)
for field in df.schema.fields
if type(field.dataType) == ArrayType or type(field.dataType) == StructType])
while len(complex_fields)!=0:
@sysboss
sysboss / query_athena.py
Created May 21, 2018 15:41
SQL Query Amazon Athena using Python
#!/usr/bin/env python3
#
# Query AWS Athena using SQL
# Copyright (c) Alexey Baikov <sysboss[at]mail.ru>
#
# This snippet is a basic example to query Athen and load the results
# to a variable.
#
# Requirements:
# > pip3 install boto3 botocore retrying
@trentniemeyer
trentniemeyer / MixPanelExport.py
Last active March 6, 2024 14:53
MixPanel Export API in Python 3
#! /usr/bin/env python
#
# Mixpanel, Inc. -- http://mixpanel.com/
#
# Python API client library to consume mixpanel.com analytics data.
#
# Copyright 2010-2013 Mixpanel, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@mrkwjc
mrkwjc / cuspsolve.py
Last active November 28, 2022 09:49
CUDA vs. CPU sparse solver in Python
# ### Interface cuSOLVER PyCUDA
from __future__ import print_function
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import scipy.sparse as sp
import ctypes
@frengky
frengky / nvidia-centos-update.md
Last active July 19, 2023 09:31
Update Nvidia driver on CentOS 7.x after kernel update

Update Nvidia driver on CentOS 7.x

Download the latest Nvidia driver on http://www.nvidia.com/drivers

Update the kernel to the latest version

$ yum update

Change to runlevel 3 - multi user mode for the next reboot

@StuPig
StuPig / pre-push
Created December 9, 2014 12:18
禁止直接向master和develop分之提交代码的钩子,放在.git/hooks/pre-push
#!/bin/bash
current_branch=$(git symbolic-ref HEAD | sed -e 's,.*/\(.*\),\1,')
if [ "$current_branch" = "master" ] || [ "$current_branch" = "develop" ]; then
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo ""
echo "禁止直接向master或develop分支提交代码!"
echo ""
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
exit 1
@clintongormley
clintongormley / load_test_data.sh
Last active January 5, 2024 07:32
Run these commands in your shell to setup the test data for Chapter 5
curl -XPUT 'http://localhost:9200/us/user/1?pretty=1' -d '
{
"email" : "john@smith.com",
"name" : "John Smith",
"username" : "@john"
}
'
curl -XPUT 'http://localhost:9200/gb/user/2?pretty=1' -d '
{
@deemstone
deemstone / line-by-line.js
Last active January 20, 2020 05:40
By Nodejs , read large file line by line. mainly created for logfile processing.
/*
* 逐行读取文件
* 分段读取文件
*/
var fs = require('fs');
//每段读取的长度
//@param inputFile{filepath}
//@param onEnd{func} 所有内容读完了
module.exports = function(inputFile, onEnd){
var sLength = 1024;