Dheeraj Inampudi dheerajinampudi

## .py
#Starting from Python 3.5, pathlib.Path.mkdir has an exist_ok flag:

from pathlib import Path
path = Path('/my/directory/filename.txt')
path.parent.mkdir(parents=True, exist_ok=True)
# path.parent ~ os.path.dirname(path)
#This recursively creates the directory and does not raise an exception if the directory already exists.
#(just as os.makedirs got an exist_ok flag starting from python 3.2 e.g os.makedirs(path, exist_ok=True))

## .sh
 I tried in the following way

    aws s3 ls s3://Bucket1/folder1/2019/ --recursive |grep filename.csv

This outputs the actual path where the file exists

    2019-04-05 01:18:35     111111 folder1/2019/03/20/filename.csv

## pyspark_udf_filtering.py
from pyspark.sql.functions import udf
from pyspark.sql.types import BooleanType

def regex_filter(x):
    regexs = ['.*ALLYOURBASEBELONGTOUS.*']

    if x and x.strip():
        for r in regexs:
            if re.match(r, x, re.IGNORECASE):
                return True

## PySpark DataFrame from many small pandas DataFrames.ipynb

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dheerajinampudi
                / PySpark DataFrame from many small pandas DataFrames.ipynb
            
            
              Created
              May 17, 2019 07:19
                — forked from linar-jether/PySpark DataFrame from many small pandas DataFrames.ipynb
            
              
                Convert a RDD of pandas DataFrames to a single Spark DataFrame using Arrow and without collecting all data in the driver.
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## py
#interchange two column's order in pandas
cols = df3.columns.tolist()
column_to_move = "Altitude"
new_position = 1
cols.insert(new_position, cols.pop(cols.index(column_to_move)))
df3 = df3[cols]

## pythonubuntu.txt
sudo apt install python-is-python3

this replaces the symlink in /usr/bin/python to /usr/bin/python3

## cloudwatchpowertools_example.json
{
   "timestamp": "2021-02-12 18:17:33,774",
   "level": "INFO",
   "location": "collect.handler:1",
   "service": "payment",
   "lambda_function_name": "test",
   "lambda_function_memory_size": 128,
   "lambda_function_arn": "arn:aws:lambda:eu-west-1:12345678910:function:test",
   "lambda_request_id": "52fdfc07-2182-154f-163f-5f0f9a621d72",
   "cold_start": true,

## zipfile_extraction.py
import zipfile
path_to_zip_file = 'archive.zip'
directory_to_extract_to = 'archive_unzipped_py/'
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(directory_to_extract_to)

## os_walk_paths.py
import os
for root, dirs, files in os.walk(".", topdown=False):
   for name in files:
      print(os.path.join(root, name))
      #print(name)
   for name in dirs:
      print(os.path.join(root, name))
      #print(name)

## s3-endpoint-diff.csv

          
            Website Endpoint
            Rest API

            
              Bucket is publicly available
              Only Accessible via cloudfront endpoint

            
              Must be a public bucket
              Need not be a public bucket

            
              Less secured due to s3 global read access
              More secured because of OAI configuration

            
              Users can access your files through CloudFront and S3 bucket directly
              users can only access your files through CloudFront not directly from the S3 bucket

            
              Makes Auditing difficult as buckets need public access option to be ON at all times
              Meet compliance by disabling public access to all buckets by default
	#Starting from Python 3.5, pathlib.Path.mkdir has an exist_ok flag:

	from pathlib import Path
	path = Path('/my/directory/filename.txt')
	path.parent.mkdir(parents=True, exist_ok=True)
	# path.parent ~ os.path.dirname(path)
	#This recursively creates the directory and does not raise an exception if the directory already exists.
	#(just as os.makedirs got an exist_ok flag starting from python 3.2 e.g os.makedirs(path, exist_ok=True))
	I tried in the following way

	aws s3 ls s3://Bucket1/folder1/2019/ --recursive \|grep filename.csv

	This outputs the actual path where the file exists

	2019-04-05 01:18:35 111111 folder1/2019/03/20/filename.csv
	from pyspark.sql.functions import udf
	from pyspark.sql.types import BooleanType

	def regex_filter(x):
	regexs = ['.ALLYOURBASEBELONGTOUS.']

	if x and x.strip():
	for r in regexs:
	if re.match(r, x, re.IGNORECASE):
	return True
	#interchange two column's order in pandas
	cols = df3.columns.tolist()
	column_to_move = "Altitude"
	new_position = 1
	cols.insert(new_position, cols.pop(cols.index(column_to_move)))
	df3 = df3[cols]
	sudo apt install python-is-python3

	this replaces the symlink in /usr/bin/python to /usr/bin/python3
	{
	"timestamp": "2021-02-12 18:17:33,774",
	"level": "INFO",
	"location": "collect.handler:1",
	"service": "payment",
	"lambda_function_name": "test",
	"lambda_function_memory_size": 128,
	"lambda_function_arn": "arn:aws:lambda:eu-west-1:12345678910:function:test",
	"lambda_request_id": "52fdfc07-2182-154f-163f-5f0f9a621d72",
	"cold_start": true,
	import zipfile
	path_to_zip_file = 'archive.zip'
	directory_to_extract_to = 'archive_unzipped_py/'
	with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
	zip_ref.extractall(directory_to_extract_to)
	import os
	for root, dirs, files in os.walk(".", topdown=False):
	for name in files:
	print(os.path.join(root, name))
	#print(name)
	for name in dirs:
	print(os.path.join(root, name))
	#print(name)
	Website Endpoint	Rest API
	Bucket is publicly available	Only Accessible via cloudfront endpoint
	Must be a public bucket	Need not be a public bucket
	Less secured due to s3 global read access	More secured because of OAI configuration
	Users can access your files through CloudFront and S3 bucket directly	users can only access your files through CloudFront not directly from the S3 bucket
	Makes Auditing difficult as buckets need public access option to be ON at all times	Meet compliance by disabling public access to all buckets by default