Piotr Mlocek pimlock

## shardcalc.py
import sys

def factorial(n):
    if n == 0:
        return 1
    else:
        return n * factorial(n - 1)

def choose(n, m):
    return factorial(n) / (factorial(m) * factorial(n - m))

## gist:745a7fb8665dca2e6b10aabadb225786
==============================
building project: /home/travis/build/aws-samples/aws-cdk-examples/scripts/../python/ecs/cluster/requirements.txt
==============================
Requirement already satisfied: aws-cdk.core in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 1))
Requirement already satisfied: aws-cdk.aws_autoscaling in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 2))
Requirement already satisfied: aws-cdk.aws_ec2 in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 3))
Requirement already satisfied: aws-cdk.aws_ecs in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 4))
Requirement already satisfied: aws-cdk.aws-autoscaling-common in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 7))
Requirement already satisfied: publication>=0.0.3 in /tmp/.venv/lib/python3.6/site-packages (from aws-cdk.core->-r requirements.txt (line 1))
Requirement already satisfied: jsii~=0.20.7 in /tmp/.venv/lib/

## loading-data-into-dataframe.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              2 stars
            
          
                pimlock
                / loading-data-into-dataframe.ipynb
            
            
              Last active
              May 14, 2021 02:00
            
              
                loading-data-into-dataframe.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pandas-supported-formats.py
import pandas as pd
df_list = pd.read_html(
  "https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html",
  match="Format Type"
)
print(f"Pandas can read data in {len(df_list[0])} formats: {nl + nl.join(df['Data Description'].tolist())}")

## sample-data.jsonl
{"Rank":1,"NOC":"United States (USA)","Gold":46}
{"Rank":2,"NOC":"Great Britain (GBR)","Gold":27}
{"Rank":3,"NOC":"China (CHN)","Gold":26}
...

## reading-jsonl.py
df = pd.read_json("2016-olympics-medals.jsonl", lines=True)
df.info()

## sample-data.json
{
  "Timestamp": "2021-05-11T14:38:10",
  "Countries": [
    {"Rank":1,"NOC":"United States (USA)","Gold":46},
    {"Rank":2,"NOC":"Great Britain (GBR)","Gold":27},
    {"Rank":3,"NOC":"China (CHN)","Gold":26},
    {"...more data"}
  ]
}

## loading-json.py
import json
with open("2016-olympics-medals.json") as f:
  data = json.load(f)

df = pd.json_normalize(data, record_path="Countries")

## loading-excel.py
df_excel = pd.read_excel("2016-olympics-medals.xls", sheet_name="Medals")
df_excel.info()

## reading-parquet.py
df_parquet = pd.read_parquet("2016-olympics-medals.snappy.parquet")
df_parquet.info()
	import sys

	def factorial(n):
	if n == 0:
	return 1
	else:
	return n * factorial(n - 1)

	def choose(n, m):
	return factorial(n) / (factorial(m) * factorial(n - m))
	==============================
	building project: /home/travis/build/aws-samples/aws-cdk-examples/scripts/../python/ecs/cluster/requirements.txt
	==============================
	Requirement already satisfied: aws-cdk.core in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 1))
	Requirement already satisfied: aws-cdk.aws_autoscaling in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 2))
	Requirement already satisfied: aws-cdk.aws_ec2 in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 3))
	Requirement already satisfied: aws-cdk.aws_ecs in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 4))
	Requirement already satisfied: aws-cdk.aws-autoscaling-common in /tmp/.venv/lib/python3.6/site-packages (from -r requirements.txt (line 7))
	Requirement already satisfied: publication>=0.0.3 in /tmp/.venv/lib/python3.6/site-packages (from aws-cdk.core->-r requirements.txt (line 1))
	Requirement already satisfied: jsii~=0.20.7 in /tmp/.venv/lib/
	import pandas as pd
	df_list = pd.read_html(
	"https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html",
	match="Format Type"
	)
	print(f"Pandas can read data in {len(df_list[0])} formats: {nl + nl.join(df['Data Description'].tolist())}")
	{"Rank":1,"NOC":"United States (USA)","Gold":46}
	{"Rank":2,"NOC":"Great Britain (GBR)","Gold":27}
	{"Rank":3,"NOC":"China (CHN)","Gold":26}
	...
	df = pd.read_json("2016-olympics-medals.jsonl", lines=True)
	df.info()
	{
	"Timestamp": "2021-05-11T14:38:10",
	"Countries": [
	{"Rank":1,"NOC":"United States (USA)","Gold":46},
	{"Rank":2,"NOC":"Great Britain (GBR)","Gold":27},
	{"Rank":3,"NOC":"China (CHN)","Gold":26},
	{"...more data"}
	]
	}
	import json
	with open("2016-olympics-medals.json") as f:
	data = json.load(f)

	df = pd.json_normalize(data, record_path="Countries")
	df_excel = pd.read_excel("2016-olympics-medals.xls", sheet_name="Medals")
	df_excel.info()
	df_parquet = pd.read_parquet("2016-olympics-medals.snappy.parquet")
	df_parquet.info()