Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/python3
import asyncio
import boto3
import itertools
import string
import sys
import concurrent.futures
package main
import (
"fmt"
"os"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
use aws_sdk_s3::{Client, Endpoint, Error, Region};
use http::Uri;
use awaitgroup::WaitGroup;
#[tokio::main]
async fn main() -> Result<(), Error> {
let bucket = "joshuarobinson";
let endpoint = "http://10.62.64.200";
let prefix = "";
@joshuarobinson
joshuarobinson / DownloadImagenet.ipynb
Last active October 26, 2022 03:05
Working PySpark notebook to retrieve imagenet URL list and parallelize downloads.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
use std::env;
use datafusion::arrow::util::pretty;
use datafusion::error::Result;
use datafusion::prelude::*;
/// This example demonstrates executing a simple query against an Arrow data source (Avro) and
/// fetching results
#[tokio::main]
package main
import (
"fmt"
"net/url"
"os"
"runtime"
"strings"
"sync"
"time"
#!/usr/bin/python3
import boto3
import multiprocessing
import sys
FB_DATAVIP='10.62.64.200'
AWS_KEY = os.environ.get('AWS_KEY')
AWS_SECRET = os.environ.get('AWS_SECRET')
FROM openjdk:8-slim
ARG HADOOP_VERSION=3.2.0
RUN apt-get update && apt-get install -y curl --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
# Download and extract the Hadoop binary package.
RUN curl https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz \
| tar xvz -C /opt/ \
package main
import (
"fmt"
"os"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
@joshuarobinson
joshuarobinson / Dockerfile
Created November 22, 2021 09:49
spark-s3a-dockerfile
FROM openjdk:8-slim
# Variables that define which software versions to install.
ARG SPARK_VERSION
ARG HADOOP_VERSION=3.2.2
# Install necessary libraries for running Spark.
# Install curl for the build process; will remove later.
RUN apt-get update && apt-get install -y build-essential curl procps python python3 python3-pip python3-setuptools --no-install-recommends \
&& pip3 install wheel