-
-
Save vivshri/f5ce4bddecc5262cf289be219cda2b38 to your computer and use it in GitHub Desktop.
amt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import findspark | |
findspark.init() | |
import pyspark | |
import random | |
from pyspark.sql import SparkSession | |
sc = pyspark.SparkContext.getOrCreate() | |
print sc.version | |
spark = SparkSession.builder.appName("vivek").getOrCreate() | |
pyspark --master local | |
puppeteer on bitnami/ubuntu | |
apt-get update && apt-get install -yq --no-install-recommends libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 libnss3 | |
const puppeteer = require('puppeteer'); | |
async function run() { | |
const browser = await puppeteer.launch({ headless: true, executablePath: "/root/node_modules/puppeteer/.local-chromium/linux-555668/chrome-linux/chrome" }); | |
//const browser = await puppeteer.launch() | |
const page = await browser.newPage(); | |
await page.goto('https://github.com'); | |
await page.screenshot({ path: 'screenshots/github.png' }); | |
browser.close(); | |
} | |
run(); | |
val table = sparkSession.read | |
.format("jdbc") | |
.option("url", "jdbc:oracle:thin://@XXX") | |
.option("dbtable", "tcga.%s".format(tableName)) | |
.option("user", "XXX") | |
.option("password", "XXX") | |
.option("driver", "oracle.jdbc.driver.OracleDriver") | |
.option("fetchsize", "50000") | |
.option("numPartitions", "200") | |
.load() | |
import pandas as pd | |
data = pd.read_excel('pandas_example.xlsx', sheet_name = 0) | |
data = data.astype({'A': 'int32', 'B': 'object'}) | |
data.to_parquet('example.parquet') | |
def unionAll(dfs): | |
return functools.reduce(lambda df1,df2: df1.union(df2.select(df1.columns)), dfs) | |
# Save schema from the original DataFrame into json: | |
schema_json = df.schema.json() | |
# Restore schema from json: | |
import json | |
new_schema = StructType.fromJson(json.loads(schema_json)) | |
# read a part of the whole datalake just to extract the schema | |
part = spark.read.json("s3a://path/to/json/part") | |
# create a temporary rdd in order to store the schema as binary file | |
temp_rdd = sc.parallelize(part.schema) | |
temp_rdd.coalesce(1).saveAsPickleFile("s3a://path/to/destination_schema.pickle") | |
# from now on, the schema will be saved. | |
# it could be used to improve the speed of reading json files. | |
schema_rdd = sc.pickleFile("s3a://path/to/destination_schema.pickle") | |
reading_schema = StructType(schema_rdd.collect()) | |
your_data_set = spark.read.json("s3a://path/to/entire_data_lake", reading_schema) # this would be quicker than just spark.read.json() | |
# multiple dataframes | |
gwas_dfs = [] | |
for inf in glob(gwas_pattern): | |
inf = os.path.abspath(inf) | |
df = spark.read.parquet(inf) | |
gwas_dfs.append(df) | |
# Load molecular trait dfs | |
mol_dfs = [] | |
for inf in glob(mol_pattern): | |
inf = os.path.abspath(inf) | |
df = ( | |
spark.read.parquet(inf) | |
.drop('num_tests') | |
) | |
mol_dfs.append(df) | |
# Take union | |
df = reduce( | |
pyspark.sql.DataFrame.unionByName, | |
gwas_dfs + mol_dfs | |
) | |
#empty schema | |
empty_schema = StructType([]) | |
joined_df = spark.createDataFrame([],empty_schema) | |
#creating dictonary | |
df = spark.read.format("csv")\ | |
.option("header", "true")\ | |
.option("inferSchema", "true")\ | |
.load("/Users/vivshri/Downloads/2010-12-02.csv") | |
from pyspark.sql.functions import expr, locate | |
simpleColors = ["black", "white", "red", "green", "blue"] | |
def color_locator(column, color_string): | |
return locate(color_string.upper(), column)\ | |
.cast("boolean")\ | |
.alias("is_" + color_string) | |
selectedColumns = [color_locator(df.Description, c) for c in simpleColors] | |
selectedColumns.append(expr("*")) # has to a be Column type | |
print selectedColumns | |
Try with the Glue Option | |
hostname = urlparse.urlparse(url).hostname or '' | |
zip -r deployment.zip .\index.js ..\..\node_modules\puppeteer-core\ ..\..\node_modules\chrome-aws-lambda\ ..\..\node_modules\lambdafs\ ..\..\node_modules\debug\ ..\..\node_modules\ms\ ..\..\node_modules\mime\ ..\..\node_modules\extract-zip\ ..\..\node_modules\yauzl\ ..\..\node_modules\pend\ ..\..\node_modules\mkdirp\ ..\..\node_modules\concat-stream\ ..\..\node_modules\readable-stream\ ..\..\node_modules\process-nextick-args\ ..\..\node_modules\isarray\ ..\..\node_modules\safe-buffer\ ..\..\node_modules\core-util-is\ ..\..\node_modules\inherits\ ..\..\node_modules\util-deprecate\ ..\..\node_modules\buffer-from\ ..\..\node_modules\rimraf\ ..\..\node_modules\proxy-from-env\ ..\..\node_modules\ws\ ..\..\node_modules\async-limiter\ | |
Nightmare.js installation | |
.\node_modules\ms\ .\node_modules\mime\ .\node_modules\extract-zip\ .\node_modules\yauzl\ .\node_modules\pend\ .\node_modules\mkdirp\ .\node_modules\concat-stream\ .\node_modules\readable-stream\ .\node_modules\process-nextick-args\ .\node_modules\isarray\ .\node_modules\safe-buffer\ .\node_modules\core-util-is\ .\node_modules\inherits\ .\node_modules\util-deprecate\ .\node_modules\buffer-from\ .\node_modules\rimraf\ .\node_modules\proxy-from-env\ .\node_modules\ws\ .\node_modules\async-limiter\ | |
.\node_modules\ms\ .\node_modules\mime\ .\node_modules\extract-zip\ .\node_modules\yauzl\ .\node_modules\pend\ .\node_modules\mkdirp\ .\node_modules\concat-stream\ .\node_modules\readable-stream\ .\node_modules\process-nextick-args\ .\node_modules\isarray\ .\node_modules\safe-buffer\ .\node_modules\core-util-is\ .\node_modules\inherits\ .\node_modules\util-deprecate\ .\node_modules\buffer-from\ .\node_modules\rimraf\ .\node_modules\proxy-from-env\ .\node_modules\ws\ .\node_modules\async-limiter\ | |
node_modules\buffer-from\ .\node_modules\rimraf\ .\node_modules\proxy-from-env\ .\node_modules\ws\ .\node_modules\async-limiter\ | |
zip warning: name not matched: .\node_modules\mime\ | |
zip warning: name not matched: .\node_modules\proxy-from-env\ | |
zip warning: name not matched: .\node_modules\ws\ | |
zip warning: name not matched: .\node_modules\async-limiter\ | |
updating: index.js (164 bytes security) (deflated 61%) | |
zip -r new_deployment.zip .\index.js .\lib\ .\node_modules\nightmare\ .\node_modules\electron\ ..\node_modules\debug\ .\node_modules\ms\ .\node_modules\mime\ .\node_modules\extract-zip\ .\node_modules\yauzl\ .\node_modules\pend\ .\node_modules\mkdirp\ .\node_modules\concat-stream\ .\node_modules\readable-stream\ .\node_modules\process-nextick-args\ .\node_modules\isarray\ .\node_modules\safe-buffer\ .\node_modules\core-util-is\ .\node_modules\inherits\ .\node_modules\util-deprecate\ .\node_modules\buffer-from\ .\node_modules\rimraf\ .\node_modules\proxy-from-env\ .\node_modules\ws\ .\node_modules\async-limiter\ .\node_modules\sliced\ .\node_modules\jsesc\ .\node_modules\once\ .\node_modules\wrappy\ | |
aws lambda update-function-code --function-name myNightmareFunction --zip-file fileb://new_deployment.zip | |
aws lambda invoke --function-name myNightmareFunction --payload '{ "name": "Bob" }' \ | |
zip -r new_deployment.zip .\index.js .\lib\ .\node_modules\nightmare\ .\node_modules\electron\ ..\node_modules\debug\ .\node_modules\ms\ .\node_modules\mime\ .\node_modules\extract-zip\ .\node_modules\yauzl\ .\node_modules\pend\ .\node_modules\mkdirp\ .\node_modules\concat-stream\ .\node_modules\readable-stream\ .\node_modules\process-nextick-args\ .\node_modules\isarray\ .\node_modules\safe-buffer\ .\node_modules\core-util-is\ .\node_modules\inherits\ .\node_modules\util-deprecate\ .\node_modules\buffer-from\ .\node_modules\rimraf\ .\node_modules\proxy-from-env\ .\node_modules\ws\ .\node_modules\async-limiter\ .\node_modules\sliced\ .\node_modules\jsesc\ .\node_modules\once\ .\node_modules\wrappy\ .\node_modules\split2\ .\node_modules\defaults\ .\node_modules\clone\ .\node_modules\minstache\; aws lambda update-function-code | |
--function-name myNightmareFunction --zip-file fileb://new_deployment.zip ; aws lambda invoke --function-name myNightmareFunction --payload '{ \"name\": \"Bob\" }' | |
aws lambda update-function-code --function-name myNightmareFunction --zip-file fileb://new_deployment.zip | |
---Docker-- | |
##run a container## make sure to use -d | |
docker run -it -d shykes/pybuilder /bin/bash | |
## to attach to running container | |
docker exec -t -i amazing_elbakyan /bin/bash | |
docker run -v /tmp:/tmp -it -d dashboard_validation_without_cmd xvfb-run -a node dashboard-validation/automation.js > /tmp/logs.log | |
ACCOUNT_ID = boto3.client('sts').get_caller_identity()['Account'] | |
pip install -U --pre pipenv | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment