Skip to content

Instantly share code, notes, and snippets.

View avro-test.rs
use std::env;
use datafusion::arrow::util::pretty;
use datafusion::error::Result;
use datafusion::prelude::*;
/// This example demonstrates executing a simple query against an Arrow data source (Avro) and
/// fetching results
#[tokio::main]
View list-s3-custom-metadata.rs
use aws_sdk_s3::{Client, Endpoint, Error, Region};
use http::Uri;
use awaitgroup::WaitGroup;
#[tokio::main]
async fn main() -> Result<(), Error> {
let bucket = "joshuarobinson";
let endpoint = "http://10.62.64.200";
let prefix = "";
View s3-list-metadata.go
package main
import (
"fmt"
"net/url"
"os"
"runtime"
"strings"
"sync"
"time"
View s3-list-custom-metadata-process.py
#!/usr/bin/python3
import boto3
import multiprocessing
import sys
FB_DATAVIP='10.62.64.200'
AWS_KEY = os.environ.get('AWS_KEY')
AWS_SECRET = os.environ.get('AWS_SECRET')
View gist:b28d42bd39150cc08c75d3819bbd7ab7
package main
import (
"fmt"
"os"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
@joshuarobinson
joshuarobinson / Dockerfile
Created November 22, 2021 09:49
spark-s3a-dockerfile
View Dockerfile
FROM openjdk:8-slim
# Variables that define which software versions to install.
ARG SPARK_VERSION
ARG HADOOP_VERSION=3.2.2
# Install necessary libraries for running Spark.
# Install curl for the build process; will remove later.
RUN apt-get update && apt-get install -y build-essential curl procps python python3 python3-pip python3-setuptools --no-install-recommends \
&& pip3 install wheel
View gist:e74ab2268cae9c5db692d78bdefff0ef
# AWS-CLI example to add custom-metadata to an existing object using the zero-copy API
aws --endpoint-url $ENDPOINT s3api copy-object --bucket $BUCKET --copy-source $BUCKET/$KEY --key $KEY --metadata '{"custom-thing":"123456"}' --metadata-directive REPLACE
# Golang code to get the contents of an object, very similar to Boto3
input := &s3.GetObjectInput{Bucket: bucketname, Key: &k}
result, err := input_svc.GetObject(input)
# Boto3 reference for CopyObject, which is a zero-copy operation on FlashBlade
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.copy_object
View s3meta-to-es.py
import boto3
from datetime import datetime
import os
import sys
import time
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import urllib3
View s3-undelete.py
#!/usr/bin/python3
import boto3
import sys
# Hard-coded endpoint override, update this for your use.
FB_DATAVIP='10.62.64.200'
if len(sys.argv) != 3:
print("Usage: {} bucketname key".format(sys.argv[0]))
View s3-rapid-list.go
package main
import (
"fmt"
"os"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)