Martina Pugliese martinapugliese

## classification_entropy_demonstration.py
# Imports
import pandas as pd
import numpy as np
from scipy.stats import entropy

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from matplotlib import pyplot as plt

## ref_es_queries.md

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              8 stars
            
          
                martinapugliese
                / ref_es_queries.md
            
            
              Last active
              August 14, 2023 08:08
            
              
                Sample Elasticsearch queries in Python, as reference.
              
          
    Collection of sample Elasticsearch queries

Use the Python client elasticsearch.
Connect to cluster (the client)

from elasticsearch import Elasticsearch

es_client = Elasticsearch() # local


## boto_dynamodb_methods.py
# Copyright (C) 2016 Martina Pugliese

from boto3 import resource
from boto3.dynamodb.conditions import Key

# The boto3 dynamoDB resource
dynamodb_resource = resource('dynamodb')


def get_table_metadata(table_name):

## printingclass.py
# Copyright (C) 2016 Martina Pugliese

# Imports
from datetime import datetime


# #################### ANSI Escape codes for terminal #########################

codes_dict = {

## string_builtins.py
# Copyright (C) 2016 Martina Pugliese


def run_methods():

    print '\n'

    print '* Count occurrences of substring in string'
    print 'Martina'.count('art')
    print 'Martina'.count('a')

## command_line.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                martinapugliese
                / command_line.md
            
            
              Last active
              October 15, 2018 16:33
            
          
    A collection of useful command line hacks (Unix)

Memory usage

MACOS

vm_stat is the command, this makes output user friendly, thanks to this.
vm_stat | perl -ne '/page size of (\d+)/ and $size=$1; /Pages\s+([^:]+)[^\d]+(\d+)/ and printf("%-16s % 16.2f Mi\n", "$1:", $2 * $size / 1048576);'


## python_libs_tricks.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                martinapugliese
                / python_libs_tricks.md
            
            
              Created
              May 5, 2018 08:44
            
          
    A collection of little libraries that help workflow

TQDM

Shows progress bar in a notebook's cell.
for i in tqdm(range(10), 'wasting time', unit='iterations wasted'):
    sleep(0.5)

  
## pyplot.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                martinapugliese
                / pyplot.md
            
            
              Last active
              March 27, 2018 16:44
            
          
    Pyplot reference stuff

Those things that I always forget how to do.
import pyplot as plt

Matplotlib styles


## pandas.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                martinapugliese
                / pandas.md
            
            
              Last active
              March 22, 2018 11:20
            
          
    Pandas reference things

df is a DataFrame.
Grouping df on multiple functions and dropping hierarchical level

grouped_df = df.groupby(['colA', 'colB']) \
    .agg(
 {


## python_memory.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                martinapugliese
                / python_memory.md
            
            
              Last active
              February 22, 2018 07:26
            
          
    A reference on small Python stuffs

Various lil' things


See Difference in for key in d.keys() and for key in d
On underscores in Python
Assigning more vars to same value
Various ways, different in speed and in preserving the order, to uniquify a list: here and a followup with another suggestion (f7) here
Various ways, with time benchmarks, to check if key is in dict, here
	# Imports
	import pandas as pd
	import numpy as np
	from scipy.stats import entropy

	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report

	from matplotlib import pyplot as plt
	# Copyright (C) 2016 Martina Pugliese

	from boto3 import resource
	from boto3.dynamodb.conditions import Key

	# The boto3 dynamoDB resource
	dynamodb_resource = resource('dynamodb')


	def get_table_metadata(table_name):
	# Copyright (C) 2016 Martina Pugliese

	# Imports
	from datetime import datetime


	# #################### ANSI Escape codes for terminal #########################

	codes_dict = {
	# Copyright (C) 2016 Martina Pugliese


	def run_methods():

	print '\n'

	print '* Count occurrences of substring in string'
	print 'Martina'.count('art')
	print 'Martina'.count('a')