Skip to content

Instantly share code, notes, and snippets.

@yhilpisch
Last active October 3, 2017 14:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save yhilpisch/64aeedbeb334c89aa05529a79881ceeb to your computer and use it in GitHub Desktop.
Save yhilpisch/64aeedbeb334c89aa05529a79881ceeb to your computer and use it in GitHub Desktop.
Gist with additional files from For Python Quants Bootcamp, May 2017, New York City
3 + 4
3 * 4
3 / 4
type(3)
type(4)
3 ** 4
sqrt(3)
3 ** 0.5
import math
math.sqrt(3)
print("Python.")
a = 3
b = 0.75
c = 'Python.'
d = "He said:'I am late.'"
d
d = "He said:"I am late.""
d = 'He said:"I am late."'
d
a
print(a)
a
b
a * b
a ** b
d
2 * d
d + d
d + d * 2
d / d
d / 2
d
d[0]
d[1]
len(d)
d[20]
d[19]
d[-1]
d[-2]
d[-20]
d[-21]
d[2]
d[:2]
d[:2] + d[2:]
d[2:]
d[2:7]
d[2:7:2]
d[::2]
d[::-1]
range(10)
type(range(10))
for i in range(10):
print('For Python Quants')
for i in range(10):
print(i)
for i in range(10):
print(i ** 2)
%magic
%lsmagic
%hist
%hist?
%history?
len?
for i in range(10):
print(d[i])
for c in d:
print(c)
for _ in d:
print(_)
c
for _ in d:
print(_, end='')
for _ in d:
print(_, end='|')
for x in range(10):
print(x)
for x in range(10):
print(x ** 2)
l = [x for x in range(10)]
l
l = [x ** 2 for x in range(10)]
l
type(l)
l2 = [x ** 2 for x in range(10) if x > 2]
l2
l2 = [x ** 2 for x in range(10) if (x > 2) and (x < 8)]
l2
l[0]
l[:5]
l[5:]
l[::-1]
l = [x ** 2 for x in range(10)]
10 % 2
11 % 2
l = [x ** 2 for x in range(10) if x % 2 == 0]
l
l = [x for x in range(20) if x % 2 == 0]
l
for x in range(20):
for y in range(10, 50):
if x % 2 == 0:
# then do something
pass
def f(x):
return x ** 2
f
f(10)
f(10.5)
l = [f(x) for x in range(20) if x % 2 == 0]
l
l3 = [5, 'fpq', a, l]
l3
l3.append('this is new')
l3
l3.append(f)
l3
l3[-1](5)
l.append('new')
l
l3
l
l3
def is_prime(I):
for i in range(2, I):
if I % i == 0:
return False
return True
is_prime(8)
is_prime(10)
is_prime(11)
is_prime(13)
l = [is_prime(x) for x in range(2, 101)]
l
l = [is_prime(x) for x in range(2, 20)]
l
class MyClass(object):
pass
class my_class(object):
pass
int(Ture)
int(True)
int(False)
while True:
print('hi')
while 2:
print('hi')
2 == 2
True == 2
True == 1
def is_prime_2(I):
for i in range(2, I ** 0.5):
if I % i == 0:
return False
return True
is_prime_2(10)
def is_prime_2(I):
for i in range(2, int(I ** 0.5)):
if I % i == 0:
return False
return True
int(2.3)
int(2.7)
def is_prime_2(I):
for i in range(2, int(I ** 0.5) + 1):
if I % i == 0:
return False
return True
is_prime_2(10)
is_prime_2(11)
%ed
p1 = int(1e8 + 1)
p2 = int(1e8 + 3)
p1
p2
is_prime(p1)
is_prime(p2)
p2 = 2** 17 − 1
p2 = 2 ** 17 - 1
p2
p2 = 2 ** 31 - 1
p2
%time is_prime(p1)
%time is_prime(p2)
p2 = 2 ** 17 - 1
%time is_prime(p2)
%time is_prime_2(p2)
%time is_prime_2(int(2**31 - 1))
def is_prime_3(I):
if I % 2 == 0:
return False
for i in range(3, int(I ** 0.5) + 1, 2):
if I % i == 0:
return False
return True
%time is_prime_3(int(2**31 - 1))
%ed is_prime_3
%ed -p
from math import sqrt
sqrt(4)
ls
cd ..
ls
cd bc
!mkdir bc
cd bc/
%hist -f bc_day_1_section_02
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"http://hilpisch.com/tpq_logo.png\" width=\"350px\">"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# For Python Quants Bootcamp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Day 2**\n",
"\n",
"Yves Hilpisch\n",
"\n",
"The Python Quants GmbH"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Science Case"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pwd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"filename = '../data/Titanic.csv'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# !cat $filename"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pure Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = []\n",
"with open(filename) as f:\n",
" for line in f:\n",
" # print(line)\n",
" data.append(line)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"s = data[3]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s.upper()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s.replace('\"', '')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s.replace('\"', '')[:-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sr = s.replace('\"', '')[:-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sr"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sr.split(',')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"titanic = [s.replace('\"', '')[:-1].split(',') for s in data[1:]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"titanic[:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum([1, 2, 3, 4])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"[int(row[-1]) for row in titanic][:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum([int(row[-1]) for row in titanic])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum([int(row[-1]) for row in titanic if row[2] == 'Female'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum([int(row[-1]) for row in titanic if row[2] == 'Male'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## csv module"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"csv_reader = csv.reader(open(filename))\n",
"raw = []\n",
"for line in csv_reader:\n",
" # print(line)\n",
" raw.append(tuple(line))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw[:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"csv_dict = csv.DictReader(open(filename))\n",
"raw_dict = [line for line in csv_dict]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw_dict[:3]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum([int(row['Freq']) for row in raw_dict])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum([int(row['Freq']) for row in raw_dict\n",
" if row['Class'] == '3rd'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SQLite3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sqlite3 as sq3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"con = sq3.connect('Titanic.sql')"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"('4', 'Crew', 'Male', 'Child', 'No', '0')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"query = '''\n",
"CREATE TABLE titanic (id int, class str, sex str, age str,\n",
"survived str, freq int)\n",
"'''\n",
"# con.execute(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"q = con.execute"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q('SELECT * FROM sqlite_master').fetchall()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q('SELECT * FROM titanic').fetchall()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"con.executemany('INSERT INTO titanic VALUES (?, ?, ?, ?, ?, ?)',\n",
" raw[1:])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q('SELECT * FROM titanic').fetchmany(6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q('SELECT * FROM titanic WHERE survived == \"Yes\"').fetchall()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q('SELECT Sum(freq) FROM titanic WHERE survived == \"Yes\"').fetchall()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q('SELECT Sum(freq) FROM titanic WHERE survived == \"No\"').fetchall()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NumPy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time l = list(range(1000000))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"l[100:110]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time sum(l)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time a = np.arange(1000000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time np.sum(a)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import math"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time sum([math.sqrt(x) for x in l])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array = np.arange(10)\n",
"np.sqrt(array).round(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time np.sum(np.sqrt(a))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array * 2 + 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array.sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array.max()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"matrix = np.arange(15).reshape((5, 3))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"matrix.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"matrix.mean(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"matrix.mean(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def f(x):\n",
" return 3 * x + 0.5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f(1.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f(matrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"math.sqrt(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# math.sqrt(array)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.sqrt(array)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.sqrt(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time sum([math.sqrt(x) for x in range(1000000)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time sum([np.sqrt(x) for x in range(1000000)])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## European Option Pricing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rn = np.random.random((1000, 2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rn[:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pylab import plt\n",
"plt.style.use('ggplot')\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# plt.style.available"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(rn[:, 0], rn[:, 1], 'b.');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sn = np.random.standard_normal((1000, 2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sn.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sn.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sn -= sn.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sn.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sn /= sn.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sn.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sn.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(sn[:, 0], sn[:, 1], 'b.');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.hist(rn.flatten(), bins=35)\n",
"plt.ylabel('frequency')\n",
"plt.xlabel('values');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.hist(sn.flatten(), bins=35)\n",
"plt.ylabel('frequency')\n",
"plt.xlabel('values');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(10, 6))\n",
"plt.hist(sn.flatten(), bins=35, label='freq')\n",
"plt.axvline(sn.mean(), color='b', label='mean')\n",
"plt.axvline(-sn.std(), color='g', label='std')\n",
"plt.axvline(sn.std(), color='g')\n",
"plt.ylabel('frequency')\n",
"plt.legend(loc=0)\n",
"plt.xlabel('values')\n",
"plt.title('standard normally dist numbers')\n",
"plt.savefig('plot.pdf');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.random.seed(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rw = np.random.standard_normal((100, 5))\n",
"rw[0] = 0.0\n",
"rw = rw.cumsum(axis=0) + 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rw.round(2)[:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(10, 6))\n",
"plt.plot(rw);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Black-Scholes difference equation for static economy:\n",
"\n",
"$$\n",
"S_T = S_0 \\exp \\left(\\left(r - \\frac{1}{2} \\sigma^2\\right) T + \\sigma \\sqrt{T} z \\right)\n",
"$$\n",
"\n",
"$z$ here a standard normally distribute variable."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"S0 = 100\n",
"r = 0.05\n",
"sigma = 0.2\n",
"T = 1.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"z = np.random.standard_normal(10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ST = S0 * np.exp((r - 0.5 * sigma ** 2) * T +\n",
" sigma * math.sqrt(T) * z)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"m = ST.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.hist(ST, bins=35)\n",
"plt.axvline(m, color='b');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"S0 * math.exp(r * T)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"h = np.maximum(ST - 105, 0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.hist(h, bins=35);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"C0 = math.exp(-r * T) * h.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Structured Array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"h.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.array(raw).dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"'int O O O O int'.split()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dt = np.dtype({'names': raw[0],\n",
" 'formats': 'int O O O O int'.split()})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = np.array(raw[1:], dtype=dt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Age'][:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Freq'].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Freq'].mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[data['Sex'] == 'Female']['Freq'].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[(data['Sex'] == 'Female') &\n",
" (data['Survived'] == 'Yes')]['Freq'].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.read_csv(filename, index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"type(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['Freq'].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.groupby('Sex').sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.groupby(['Sex', 'Survived']).sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.columns.values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.groupby(list(df.columns.values[:-1])).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Summary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.read_csv(filename, index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.groupby(list(df.columns.values[:-1])).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Other Formats"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df2 = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df3 = pd.DataFrame(raw)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df3.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df4 = pd.DataFrame(raw_dict)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df5 = pd.read_sql('SELECT * FROM titanic', con, index_col='id')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# pd.read_sql?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df5.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df5.to_excel('Titanic.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ls -a -n *.xlsx"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df6 = pd.read_excel('Titanic.xlsx', index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df6.to_csv()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Financial Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# should show 0.19.x\n",
"pd.__version__"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# old days: import pandas.io.data as web\n",
"from pandas_datareader import data as web"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = web.DataReader('AMZN', data_source='yahoo')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.tail(6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pylab import plt\n",
"plt.style.use('seaborn')\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Adj Close'].plot(figsize=(10, 6));"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data['SMA1'] = data['Adj Close'].rolling(42).mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data['SMA2'] = data['Adj Close'].rolling(252).mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.tail().round(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[['Adj Close', 'SMA1', 'SMA2']].plot(figsize=(10, 6))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[['Adj Close', 'SMA1', 'SMA2']].dropna().plot(figsize=(10, 6))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.dropna(subset=['SMA1']).head().round(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Close'].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data['Returns'] = np.log(data['Adj Close'] /\n",
" data['Adj Close'].shift(1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.head().round(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Returns'].hist(bins=35);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Returns'].mean() * 252"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['Returns'].std() * 252 ** 0.5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mean-Variance Portfolio Theory"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"symbols = ['AMZN', 'AAPL']\n",
"symbols.append('MSFT')\n",
"symbols.append('GLD')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = pd.DataFrame()\n",
"for sym in symbols:\n",
" data[sym] = web.DataReader(sym, data_source='yahoo')['Adj Close']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# np.exp(0.01 * np.arange(len(data)) / 252)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data['CASH'] = np.exp(0.01 * np.arange(len(data)) / 252)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rets = np.log(data / data.shift(1)).dropna()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rets.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rets.hist(bins=35, figsize=(10, 6));"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rets.mean() * 252"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rets.std() * 252 ** 0.5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rets.cov() * 252"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rets.corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"noa = len(symbols) + 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"phi = noa * [1 / noa]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"phi"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def mu(phi):\n",
" return np.dot(rets.mean() * 252, phi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mu(phi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cov_mat = rets.cov() * 252\n",
"def vol(phi):\n",
" return np.dot(phi, np.dot(cov_mat, phi)) ** 0.5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vol(phi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"phi = np.random.random((15000, noa))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"phi = (phi.T / phi.sum(axis=1)).T"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"phi[:5].round(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"mv = np.array([(vol(p), mu(p)) for p in phi])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mv[:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(mv[:, 0], mv[:, 1], 'bo');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"mvdf = pd.DataFrame(mv, columns=['vol', 'mu'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mvdf.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mvdf.plot(x='vol', y='mu', kind='scatter', figsize=(10, 6));"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# bnds = ((0, 1), (0, 1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cons = ({'type': 'eq', 'fun': lambda phi: phi.sum() - 1})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from scipy.optimize import minimize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"res = minimize(vol, noa * [1 / noa],\n",
" constraints=cons)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"mvret = mu(res['x'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mvret"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mvdf.plot(x='vol', y='mu', kind='scatter', figsize=(10, 6));\n",
"plt.axvline(res['fun'], color='r')\n",
"plt.axhline(mvret, color='r');"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"http://hilpisch.com/tpq_logo.png\" width=\"350px\">"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#
# Tick Data Client
# with ZeroMQ
#
import zmq
import datetime
context = zmq.Context()
socket = context.socket(zmq.SUB)
socket.connect('tcp://127.0.0.1:5555')
socket.setsockopt_string(zmq.SUBSCRIBE, '')
while True:
msg = socket.recv_string()
t = datetime.datetime.now()
print(str(t) + ' | ' + msg)
#
# Tick Data Client
# with ZeroMQ
#
import zmq
import datetime
import plotly.plotly as ply
import plotly.tools as tls
from plotly.graph_objs import *
stream_ids = tls.get_credentials_file()['stream_ids']
# socket
context = zmq.Context()
socket = context.socket(zmq.SUB)
socket.connect('tcp://127.0.0.1:5555')
socket.setsockopt_string(zmq.SUBSCRIBE, '')
# plotting
s = Stream(maxpoints=100, token=stream_ids[0])
t = Scatter(x=[], y=[], name='tick data', mode='lines+markers', stream=s)
d = Data([t])
l = Layout(title='Bootcamp Tick Data')
f = Figure(data=d, layout=l)
ply.plot(f, filename='fpq_bootcamp', auto_open=True)
st = ply.Stream(stream_ids[0])
st.open()
while True:
msg = socket.recv_string()
t = datetime.datetime.now()
sym, value = msg.split()
print(str(t) + ' | ' + msg)
st.write({'x': t, 'y': float(value)})
#
# Tick Data Server
# with ZeroMQ
#
import zmq
import time
import random
context = zmq.Context()
socket = context.socket(zmq.PUB)
socket.bind('tcp://127.0.0.1:5555')
AMZN = 100.
while True:
AMZN += random.gauss(0, 1) * 0.5
msg = 'AMZN %s' % AMZN
socket.send_string(msg)
print(msg)
time.sleep(random.random() * 2)
#
# tpqoa is a wrapper class for the
# Oanda v20 API (RESTful & streaming)
# (c) Dr. Yves J. Hilpisch
# The Python Quants GmbH
#
import v20
import pandas as pd
import datetime as dt
import configparser
class tpqoa(object):
''' tpqoa is a Python wrapper class for the Oanda v20 API. '''
def __init__(self, conf_file):
''' Init function expecting a configuration file with
the following content:
[oanda_v20]
account_id = XYZ-ABC-...
access_token = ZYXCAB...
Parameters
==========
conf_file: string
path to and filename of the configuration file, e.g. '/home/me/oanda.cfg'
'''
self.config = configparser.ConfigParser()
self.config.read(conf_file)
self.access_token = self.config['oanda_v20']['access_token']
self.account_id = self.config['oanda_v20']['account_id']
self.ctx = v20.Context(
hostname='api-fxpractice.oanda.com',
port=443,
ssl=True,
application='sample_code',
token=self.access_token,
datetime_format='RFC3339')
self.ctx_stream = v20.Context(
hostname='stream-fxpractice.oanda.com',
port=443,
ssl=True,
application='sample_code',
token=self.access_token,
datetime_format='RFC3339'
)
self.suffix = '.000000000Z'
def get_instruments(self):
''' Retrieves and returns all instruments for the given account. '''
resp = self.ctx.account.instruments(self.account_id)
instruments = resp.get('instruments')
instruments = [ins.dict() for ins in instruments]
instruments = [(ins['displayName'], ins['name'])
for ins in instruments]
return instruments
def transform_datetime(self, dt):
''' Transforms Python datetime object to string. '''
if isinstance(dt, str):
dt = pd.Timestamp(dt).to_pydatetime()
return dt.isoformat('T') + self.suffix
def get_history(self, instrument, start, end,
granularity, price):
''' Retrieves historical data for instrument.
Parameters
==========
instrument: string
valid instrument name
start, end: datetime, str
Python datetime or string objects for start and end
granularity: string
a string like 'S5', 'M1' or 'D'
price: string
one of 'A' (ask) or 'B' (bid)
Returns
=======
data: pd.DataFrame
pandas DataFrame object with data
'''
start = self.transform_datetime(start)
end = self.transform_datetime(end)
raw = self.ctx.instrument.candles(
instrument=instrument,
fromTime=start, toTime=end,
granularity=granularity, price=price)
raw = raw.get('candles')
raw = [cs.dict() for cs in raw]
for cs in raw:
cs.update(cs['ask'])
del cs['ask']
if len(raw) == 0:
return 'No data available.'
data = pd.DataFrame(raw)
data['time'] = pd.to_datetime(data['time'])
data = data.set_index('time')
data.index = pd.DatetimeIndex(data.index)
for col in list('ohlc'):
data[col] = data[col].astype(float)
return data
def create_order(self, instrument, units):
''' Places order with Oanda.
Parameters
==========
instrument: string
valid instrument name
units: int
number of units of instrument to be bought (positive int, eg 'units=50')
or to be sold (negative int, eg 'units=-100')
'''
request = self.ctx.order.market(
self.account_id,
instrument=instrument,
units=units,
)
order = request.get('orderFillTransaction')
print('\n\n', order.dict(), '\n')
def stream_data(self, instrument, stop=None):
''' Starts a real-time data stream.
Parameters
==========
instrument: string
valid instrument name
'''
self.stream_instrument = instrument
self.ticks = 0
response = self.ctx_stream.pricing.stream(
self.account_id, snapshot=True,
instruments=instrument)
for msg_type, msg in response.parts():
# print(msg_type, msg)
if msg_type == 'pricing.Price':
self.ticks +=1
self.on_success(msg.time,
float(msg.bids[0].price),
float(msg.asks[0].price))
if stop is not None:
if self.ticks >= stop:
break
def on_success(self, time, bid, ask):
''' Method called when new data is retrieved. '''
print(time, bid, ask)
def get_account_summary(self, detailed=False):
''' Returns summary data for Oanda account.'''
if detailed is True:
response = self.ctx.account.get(self.account_id)
else:
response = self.ctx.account.summary(self.account_id)
raw = response.get('account')
return raw.dict()
def get_transactions(self, tid=0):
''' Retrieves and returns transactions data. '''
response = self.ctx.transaction.since(self.account_id, id=tid)
transactions = response.get('transactions')
transactions = [t.dict() for t in transactions]
return transactions
def print_transactions(self, tid=0):
''' Prints basic transactions data. '''
transactions = self.get_transactions(tid)
for trans in transactions:
templ = '%5s | %s | %9s | %12s'
print(templ % (trans['id'],
trans['time'],
trans['instrument'],
trans['units']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment