Skip to content

Instantly share code, notes, and snippets.

View slzdevsnp's full-sized avatar

sviatoslav zimine slzdevsnp

  • UK
View GitHub Profile
@slzdevsnp
slzdevsnp / gist:3808612
Last active August 12, 2019 13:07
[R extract field from list of lists] #R #functional
##get specific field from list of list
names <- unlist ( lapply( mle, function(x){x$name} ))
@slzdevsnp
slzdevsnp / SingletonPattern.py
Created August 12, 2019 13:08
[Singleton pattern] #python #patterns
#!/usr/bi/env python
class OnlyOne:
"""delegate object creation to a private nested class"""
class __OnlyOne: #naming strting with __ user cannot access
def __init__(self,arg):
self.val = arg
def __str__(self):
@slzdevsnp
slzdevsnp / ShapeFactoryPattern.py
Created August 12, 2019 13:13
[Factory pattern] static method to return #python #pattern #iterator
#!/usr/bin/env python
# a simple static factory method
from __future__ import generators
import random
import math
class Shape(object): #base abstract class
@slzdevsnp
slzdevsnp / py_memory.py
Last active August 13, 2019 09:58
[python memory] checking memory #python #performance #memory
## case 1
>>> import pyarrow.parquet as pq
>>> dataset = pq.ParquetDataset('.')
>>> dataset.read_pandas()
#compare to case 2
>>> from glob import glob
>>> from fastparquet import ParquetFile
>>> paths = glob('*.parquet)
>>> pf = ParquetFile(paths)
@slzdevsnp
slzdevsnp / fastparquet.py
Last active August 13, 2019 12:49
[python data binary formats] fastparquet pyarrow avro #python #binary #fast #data
import fastparquet
########################
# read 1 file #
########################
src_file='/tmp/file.pq'
pqf = fastparquet.ParquetFile(src_file)
s = timeit.default_timer()
df = pqf.to_pandas(columns=None)
@slzdevsnp
slzdevsnp / factorial_recursive.py
Last active December 30, 2019 16:55
[python lang] #lang #core #string #list #reverse #split
def recur_factorial(n):
if n == 1:
return n
else:
return n*recur_factorial(n-1)
@slzdevsnp
slzdevsnp / numpy_basics.py
Last active May 5, 2020 14:20
[numpy] #numeric #performance #matrix #repeat
import numpy as np
from pprint import pprint as pp
### create numpy array
mylist=[1,2,3,4]
ar=np.array(mylist)
type(mylist) #stil a list
type(ar)
@slzdevsnp
slzdevsnp / panda_basic.py
Last active September 27, 2019 09:33
[panda_basic] #loc #iloc #drop #filter #sort_values #csv
panda_basic.py
import pandas as pd
import numpy as np
from pprint import pprint as pp
# Create an example dataframe
data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy', 'Jason'],
'year': [2012, 2012, 2013, 2014, 2014, 2015],
'reports': [4, 24, 31, 2, 3, 0]}
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma', 'Moscow'])
@slzdevsnp
slzdevsnp / panda_nans.py
Last active August 18, 2019 11:12
[panda_nans] #nunique #unique #value_counts #isnull #any #dropna
import pandas as pd
import numpy as np
from pprint import pprint as pp
data = {'name': ['Jason', 'Molly', 'Tina', np.nan, 'Amy', 'Jason'],
'year': [2012, 2012, 2013, 2014, 2014, 2015],
'born': [pd.NaT, pd.NaT, pd.Timestamp('1940-04-25'), pd.NaT, pd.Timestamp('1973-03-25'), pd.NaT],
'reports': [4, 24, 31, 2, 3, np.nan]}
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma', 'Moscow'])
@slzdevsnp
slzdevsnp / panda_ops_on_columns.py
Created August 14, 2019 15:22
[panda_ops_on_columns] #astype #to_date_time #apply #lambda
import pandas as pd
import numpy as np
import datetime
from pprint import pprint as pp
data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy', 'Jason'],
'year': [2012, 2012, 2013, 2014, 2014, 2015],
'week': ['2012-01-01','2012-01-01','2013-01-01','2014-01-01','2014-01-01','2015-01-01'],