Anders L. Hurum peakBreaker

## logsetup.py
#!/usr/bin/env python

"""Logging setup

To use this module, just import it to set up the logger.  It should persist from import across the runtime

"""

import logging

## my_callback.py

def train_model():
    # ... Define the model and training data etc here

    def my_callback(epoch, logs):
        # Optionally clear the file if it exists here? or use on_train_begin cb
        # Focus on the point here
        with open('train_logs.json', 'a') as l:
            data =json.dumps({'epoch': epoch, **logs})
            l.write(data + '\n')

## iam.tf
resource "google_service_account" "sa" {
  account_id   = "my-service-account"
  display_name = "A service account that Jane can use, with some pubsub capabilities"
}

# Allow SA service account use of the default GCE account
# https://www.terraform.io/docs/providers/google/r/google_service_account_iam.html
resource "google_service_account_iam_member" "gce-default-account-iam" {
  service_account_id = data.google_compute_default_service_account.default.name
  role               = "roles/iam.serviceAccountUser"

## Arch.Dockerfile
FROM archlinux/base:latest

# Basic dependencies
RUN pacman -Syu --noconfirm
RUN pacman -S tar curl sudo grep base-devel --noconfirm
RUN mkdir -p /opt/yay/
WORKDIR /opt/yay/

# Add the builduser (cant install yay as root)
RUN useradd builduser -m # Create the builduser

## analyzedocs.py
"""
From documents to clusters

This script will run through a list of docs and process out the groups the docs may belong to using
cluster analysis, NMF and TF*IDF for preprocessing.  These are some basic techniques for unsupervised NLP
which may be very handy.

"""

# For creating the data structure to process

## mytsne.py
# Import TSNE
from sklearn.manifold import TSNE


def run_tsne(samples):
    # Create a TSNE instance: model
    model = TSNE(learning_rate=200)

    # Apply fit_transform to samples: tsne_features
    tsne_features = model.fit_transform(samples)

## postproc_scikit_sample.py
# Pred and prob arrays are numpy array outputs from a sklearn model:
#  - pred_array = model.predict(X).astype(int)
#  - prob_arr = model.predict_proba(X)
#
# Here we run the inital data through multiple models and structure the
# model output into a multilevel dataframe for probabilities and predictions
#
# Typically the next stage would be to enhance the labels of numerical results
# to string/categories or similar basaed on whatever we want, aswell as providing
# the results to a database or something like that

## statutils.py
def ecdf(data):
    """Compute ECDF for a one-dimensional array of measurements.

    Very useful for graphical EDA
    """
    # Number of data points: n
    n = len(data)

    # x-data for the ECDF: x
    x = np.sort(data)

## analyze.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                peakBreaker
                / analyze.md
            
            
              Last active
              February 7, 2020 17:00
            
              
                CodeAnalysis in Python
              
          
    Based on talk by James Powell - https://www.youtube.com/watch?v=mr2SE_drU5o
Static Analysis


cloc
find -iname '.' | xargs cat |sed -e 's/^[ \t]//'  | sort | uniq -c | sort -nr
Python:

from subprocess import check_output
files = check_output('find -iname *.<type>'.split())\


## get_filename.py
# Get filename, whcih can be used for script identification
filename_no_ext = path.splitext(path.basename(__file__))[0]
	#!/usr/bin/env python

	"""Logging setup

	To use this module, just import it to set up the logger. It should persist from import across the runtime

	"""

	import logging

	def train_model():
	# ... Define the model and training data etc here

	def my_callback(epoch, logs):
	# Optionally clear the file if it exists here? or use on_train_begin cb
	# Focus on the point here
	with open('train_logs.json', 'a') as l:
	data =json.dumps({'epoch': epoch, **logs})
	l.write(data + '\n')
	resource "google_service_account" "sa" {
	account_id = "my-service-account"
	display_name = "A service account that Jane can use, with some pubsub capabilities"
	}

	# Allow SA service account use of the default GCE account
	# https://www.terraform.io/docs/providers/google/r/google_service_account_iam.html
	resource "google_service_account_iam_member" "gce-default-account-iam" {
	service_account_id = data.google_compute_default_service_account.default.name
	role = "roles/iam.serviceAccountUser"
	FROM archlinux/base:latest

	# Basic dependencies
	RUN pacman -Syu --noconfirm
	RUN pacman -S tar curl sudo grep base-devel --noconfirm
	RUN mkdir -p /opt/yay/
	WORKDIR /opt/yay/

	# Add the builduser (cant install yay as root)
	RUN useradd builduser -m # Create the builduser
	"""
	From documents to clusters

	This script will run through a list of docs and process out the groups the docs may belong to using
	cluster analysis, NMF and TF*IDF for preprocessing. These are some basic techniques for unsupervised NLP
	which may be very handy.

	"""

	# For creating the data structure to process
	# Import TSNE
	from sklearn.manifold import TSNE


	def run_tsne(samples):
	# Create a TSNE instance: model
	model = TSNE(learning_rate=200)

	# Apply fit_transform to samples: tsne_features
	tsne_features = model.fit_transform(samples)
	# Pred and prob arrays are numpy array outputs from a sklearn model:
	# - pred_array = model.predict(X).astype(int)
	# - prob_arr = model.predict_proba(X)
	#
	# Here we run the inital data through multiple models and structure the
	# model output into a multilevel dataframe for probabilities and predictions
	#
	# Typically the next stage would be to enhance the labels of numerical results
	# to string/categories or similar basaed on whatever we want, aswell as providing
	# the results to a database or something like that
	def ecdf(data):
	"""Compute ECDF for a one-dimensional array of measurements.

	Very useful for graphical EDA
	"""
	# Number of data points: n
	n = len(data)

	# x-data for the ECDF: x
	x = np.sort(data)
	# Get filename, whcih can be used for script identification
	filename_no_ext = path.splitext(path.basename(__file__))[0]