lewiuberg/snippets.md

## snippets.md

      
    Raw
  

              snippets.md
            
          
    Useful Snippets 

Table of Contents 


Import
Setup

Matplotlib
Plotly Offline


Templates

Decorator


Misc

Check operating system
Check if path is a file or directory
Unpack list and return with Oxford comma
Combine two lists to a dictionary


Pandas

Word search in a DataFrame
Find missing values in a DataFrame
Find location of missing values in a DataFrame
Find location data from a DataFrame column
Change name of index or row items, or to datetime.


Comprehensions

List
Set
Dictionary


Import

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import chart_studio.plotly as py
import cufflinks as cf
import dash_core_components as dcc
import dash_html_components as html
import country_converter as coco
import geopy

from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output
from plotly.offline import init_notebook_mode, download_plotlyjs, plot, iplot
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
Setup

Matplotlib

pd.set_option("display.max_rows", 85)
pd.set_option("display.max_columns", 85)
Plotly Offline

init_notebook_mode(connected=True)
cf.go_offline()
Templates

Decorator

import functools

def decorator(func):
    @functools.wraps(func)
    def wrapper_decorator(*args, **kwargs):
        # Do something before
        value = func(*args, **kwargs)
        # Do something after
        return value
    return wrapper_decorator
Misc

Check operating system

def os_check():
    from sys import platform

    if platform == "darwin":
        print("macOS")
    elif platform == "win32":
        print("Windows")
    elif platform == "linux" or platform == "linux2":
        print("Linux")
    else:
        print("Oops! Something went wrong.")
Check if path is a file or directory

def path_checker(path):
    from pathlib import Path
    path = Path(path)

    if path.exists():
        if path.is_dir():
            print(f"'{path}' is directory")
        else:
            if path.is_file():
                print(f"'{path}' is a file")
        return True

    else:
        print(f"'{path}' does not exist.")
        return False
Unpack list and return with Oxford comma

def unpack_list(lst):  # Oxford comma
    if not isinstance(lst, str):
        lst = [str(item) for item in lst]
    if len(lst) == 0:
        return
    if len(lst) == 1:
        return ", ".join(lst)
    if len(lst) == 2:
        return ", and ".join(lst) 
    else:
        first_part = lst[:-1]
        last_part = lst[-1]
        return ", ".join(first_part) + ", and " + last_part
Combine two lists to a dictionary

months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
months_alpha = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Des']
dictionary = [{'label': x,'value': y} for x, y in zip(months, months_alpha)]
Pandas

Word search in a DataFrame

def word_search(df, *words):
    if not words or len(words[0]) < 1:
        return

    col_count: int = 0
    sum_words: int = 0
    found_words: str = []

    if isinstance(words[0], str):
        words = [word for word in words]
    else:
        words = list(*words)
        print(words)

    for word in words:
        col_count = 0
        sum_word = 0
        for column in df:
            if df[column].dtype == object or df[column].dtype == str:
                col_count += 1
                sum_word += df[column].str.contains(f"^{word}$").sum()
                if df[column].str.contains(f"^{word}$").any():
                    if word not in found_words:
                        found_words.append(word)
        sum_words += sum_word
    if len(found_words) == 0:
        found_words = words
    print("Columns of dtype str or object:", col_count)
    print(
        f"Instances of {unpack_list(found_words)} in the dataframe: {sum_words}"
    )
Find missing values in a DataFrame

def find_missing_values(df):
    column_names = (df.columns[df.isnull().any() == True]).format()
    miss_columns = df.isna().any().sum()
    miss_values = df.isna().sum().sum()
    print(f"Instances of missing data: {miss_values}")
    print(f"Columns with missing data: {miss_columns}")
    print(f"Column names with missing data: {unpack_list(column_names)}")
Find location of missing values in a DataFrame

def missing_location(df):
    col_criteria = df.isnull().any(axis=0)
    miss_col = df[col_criteria.index[col_criteria]]

    miss_only = miss_col[miss_col.isnull().any(axis=1)]

    row_criteria = df.isnull().any(axis=1)
    miss_row = df[row_criteria]
    
    return miss_col, miss_row, miss_only
Find location data from a DataFrame column

def df_locatinon_data(df, search_col):
    from geopy.geocoders import Nominatim
    from geopy.extra.rate_limiter import RateLimiter
    geolocator = Nominatim(user_agent="my_geocoder")
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds=.1)
    # Find the location.
    df['location'] = df[search_col].apply(geocode)
    # Extract point to its own columns.
    df['point'] = df['location'].apply(lambda loc: tuple(loc.point)
                                       if loc else None)
    # split point column into latitude, longitude and altitude columns.
    df[['latitude', 'longitude',
        'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)

    return df
Change name of index or row items, or to datetime.

def replace_df_ax_name(df, find, replace_with="", axis=0):

    dff = df.copy()

    if axis == 1:  # <-- Columns
        dff = dff.T

    dff_row = dff.index.to_list()
    dff_dict = {i: dff_row[i] for i in range(len(dff_row))}

    change_index: list = []
    change_dict: dict = {}

    for i, v in dff_dict.items():
        if find in v:
            change_index.append(i)
            if replace_with == "d_to_datetime":
                v = pd.to_datetime(v)
            else:
                v = v.replace(find, replace_with)
            change_dict[i] = v

    dff_dict.update(change_dict)
    dff.index = list(dff_dict.values())

    if axis == 1:  # <-- Columns
        dff = dff.T

    return dff
Comprehensions

Real Python
List

squares = [i * i for i in range(10)]
sentence = 'the rocket came back from mars'
vowels = [i for i in sentence if i in 'aeiou']

def is_consonant(letter):
    vowels = 'aeiou'
    return letter.isalpha() and letter.lower() not in vowels
consonants = [i for i in sentence if is_consonant(i)]
original_prices = [1.25, -9.45, 10.22, 3.78, -5.92, 1.16]
prices = [i if i > 0 else 0 for i in original_prices]
Set

quote = "life, uh, finds a way"
unique_vowels = {i for i in quote if i in 'aeiou'}
Dictionary

squares = {i: i * i for i in range(10)}
squares