Daniel Kapitan dkapitan

## import.py
import os
import configparser
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns

# function to clean column names,

## code_toggle.js
<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown

## data_munging.py
def date_lookup(s):
    """
    http://stackoverflow.com/questions/29882573/pandas-slow-date-conversion
    This is an extremely fast approach to datetime parsing.
    For large data, the same dates are often repeated. Rather than
    re-parse these, we store all unique dates, parse them, and
    use a lookup to convert all dates.
    """
    dates = {date:pd.to_datetime(date, infer_datetime_formt=True) for date in s.unique()}
    return s.map(dates)

## to_python_string.py
def to_python_string(string):
    """Replaces spaces with underscores, all characters lowercase"""
    try:
        return string.replace(' ', '_').lower()
    except (ValueError, NameError, TypeError):
        return string

## clean_names.py
def clean_python_name(s):
    """
    https://gist.github.com/dkapitan/89ff20eeed38e6d9757fef9e09e23c3d

    Method to convert string to clean string for use
    in dataframe column names such :
        i) it complies to python 2.x object name standard:
           (letter|'_')(letter|digit|'_')
        ii) my preference to use lowercase and adhere
            to practice of case-insensitive column names for data

## test.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dkapitan
                / test.ipynb
            
            
              Created
              November 12, 2017 20:12
            
              
                Testing static bokeh
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pwe1.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dkapitan
                / pwe1.md
            
            
              Last active
              January 8, 2018 09:14
            
              
                Python Weekly Exercise 1
              
          
    Hi, and welcome to the first installment of Weekly Python Exercise!  I'm excited to start this new cohort and hope that you are, too!
This week, we'll explore the built-in data types, seeing how we can store information in them, and then extract information from them, without having to create a new class.
The idea is that we want to organize a list of places to which someone has traveled. That is: We'll ask the user to enter, one at a time, a city and country to which they have traveled. The city and country should be separated by a comma. If there is no comma, then the user is given an error message, and given another chance. If the user enters a city-country combination, then this information is recorded, and then they're asked again.  Indeed, the user is asked again and again for a city-state combination, until they provide an empty response. When that happens, the questioning phase ends, and the reporting phase begins.
In the report, we'll want to see a list of all of the places visited, organized by c

  
## load_cbs_pc4.py
# download file from https://www.cbs.nl/nl-nl/dossier/nederland-regionaal/geografische%20data/wijk-en-buurtkaart-2017
import shapefile
import pandas as pd
from sqlalchemy import create_engine


db = create_engine('postgresql://localhost:5432/opendata')
sf = shapefile.Reader("/Users/dkapitan/opendata/CBS_PC4_2017/CBS_PC4_2017_v1.shp")

shapes = sf.shapes()

## ipy_repl.py
import os
import json
import socket
import threading

activate_this = os.environ.get("SUBLIMEREPL_ACTIVATE_THIS", None)
# turn off pager
os.environ['TERM'] = 'emacs'

if activate_this:

## add_column_groupby_transform.py
df = pd.DataFrame({
'Date': ['2015-05-08', '2015-05-07', '2015-05-06', '2015-05-05', '2015-05-08', '2015-05-07', '2015-05-06', '2015-05-05'],
'Sym': ['aapl', 'aapl', 'aapl', 'aapl', 'aaww', 'aaww', 'aaww', 'aaww'],
'Data2': [11, 8, 10, 15, 110, 60, 100, 40],
'Data3': [5, 8, 6, 1, 50, 100, 60, 120]})

df['Data4'] = df['Data3'].groupby(df['Date']).transform('sum')
df
# Out[74]:
#    Data2  Data3        Date   Sym  Data4
	import os
	import configparser
	from sqlalchemy import create_engine
	import pandas as pd
	import numpy as np
	import re
	import matplotlib.pyplot as plt
	import seaborn as sns

	# function to clean column names,
	<script>
	function code_toggle() {
	if (code_shown){
	$('div.input').hide('500');
	$('#toggleButton').val('Show Code')
	} else {
	$('div.input').show('500');
	$('#toggleButton').val('Hide Code')
	}
	code_shown = !code_shown
	def date_lookup(s):
	"""
	http://stackoverflow.com/questions/29882573/pandas-slow-date-conversion
	This is an extremely fast approach to datetime parsing.
	For large data, the same dates are often repeated. Rather than
	re-parse these, we store all unique dates, parse them, and
	use a lookup to convert all dates.
	"""
	dates = {date:pd.to_datetime(date, infer_datetime_formt=True) for date in s.unique()}
	return s.map(dates)
	def to_python_string(string):
	"""Replaces spaces with underscores, all characters lowercase"""
	try:
	return string.replace(' ', '_').lower()
	except (ValueError, NameError, TypeError):
	return string
	def clean_python_name(s):
	"""
	https://gist.github.com/dkapitan/89ff20eeed38e6d9757fef9e09e23c3d

	Method to convert string to clean string for use
	in dataframe column names such :
	i) it complies to python 2.x object name standard:
	(letter\|'_')(letter\|digit\|'_')
	ii) my preference to use lowercase and adhere
	to practice of case-insensitive column names for data
	# download file from https://www.cbs.nl/nl-nl/dossier/nederland-regionaal/geografische%20data/wijk-en-buurtkaart-2017
	import shapefile
	import pandas as pd
	from sqlalchemy import create_engine


	db = create_engine('postgresql://localhost:5432/opendata')
	sf = shapefile.Reader("/Users/dkapitan/opendata/CBS_PC4_2017/CBS_PC4_2017_v1.shp")

	shapes = sf.shapes()
	import os
	import json
	import socket
	import threading

	activate_this = os.environ.get("SUBLIMEREPL_ACTIVATE_THIS", None)
	# turn off pager
	os.environ['TERM'] = 'emacs'

	if activate_this:
	df = pd.DataFrame({
	'Date': ['2015-05-08', '2015-05-07', '2015-05-06', '2015-05-05', '2015-05-08', '2015-05-07', '2015-05-06', '2015-05-05'],
	'Sym': ['aapl', 'aapl', 'aapl', 'aapl', 'aaww', 'aaww', 'aaww', 'aaww'],
	'Data2': [11, 8, 10, 15, 110, 60, 100, 40],
	'Data3': [5, 8, 6, 1, 50, 100, 60, 120]})

	df['Data4'] = df['Data3'].groupby(df['Date']).transform('sum')
	df
	# Out[74]:
	# Data2 Data3 Date Sym Data4