Skip to content

Instantly share code, notes, and snippets.

@davidlenz
davidlenz / 20_newsgroup_to_csv.py
Last active March 4, 2023 15:09
20 newsgroup dataset from sklearn to csv.
from sklearn.datasets import fetch_20newsgroups
import pandas as pd
def twenty_newsgroup_to_csv():
newsgroups_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'))
df = pd.DataFrame([newsgroups_train.data, newsgroups_train.target.tolist()]).T
df.columns = ['text', 'target']
targets = pd.DataFrame( newsgroups_train.target_names)
@davidlenz
davidlenz / get_aws_batch_params.py
Created July 15, 2022 16:42
Get the parameters for job queues and job definitions
def get_job_queues(client):
"""
Get all job queues from aws batch service. Transform into dataframe and filter for valid ones.
extract the repo name from the job queue name. Transform into dict and return it.
:param client:
:return:
"""
jq = client.describe_job_queues()["jobQueues"]
jq = pd.DataFrame.from_records(jq)
@davidlenz
davidlenz / list.no-extension
Created July 10, 2022 15:57
list of aws instances that can be used with batch
Instance type can only be one of [m6g.xlarge, optimal, m5n, m3.xlarge, r4.16xlarge, r5a.2xlarge, m6gd.xlarge, c5a.2xlarge, c6gd.12xlarge, r5b.16xlarge, m5.large, m6g.2xlarge, m5dn.2xlarge, m6g, m6i, g4dn.2xlarge, i3en.6xlarge, r6g.xlarge, c3, c4, c5ad.xlarge, c5, m6gd, m6gd.12xlarge, i2.xlarge, m5d.12xlarge, m5.metal, m4.4xlarge, inf1, m6g.12xlarge, r5n.4xlarge, c4.large, c5d.2xlarge, z1d, d2, d3, r5d.2xlarge, r5.xlarge, r5b.24xlarge, c4.4xlarge, r6gd.12xlarge, c6gd.2xlarge, m5.8xlarge, c5n.9xlarge, c5n.2xlarge, m6i.24xlarge, r4.2xlarge, c6gn.4xlarge, m5zn.6xlarge, m5d.4xlarge, c6gn, r5ad.xlarge, c6gd, c6g.12xlarge, r5b.xlarge, c5.12xlarge, m6gd.large, m6i.8xlarge, f1, r5n.2xlarge, d3.xlarge, x1.16xlarge, r5n.12xlarge, m5a.4xlarge, g4ad.8xlarge, c5n.4xlarge, g2, g3, i2.8xlarge, m5dn.4xlarge, x1e, m5.24xlarge, g4dn.12xlarge, r5d.12xlarge, m3.2xlarge, m5ad.8xlarge, i3en.3xlarge, c5d.12xlarge, r5a.4xlarge, m5n.xlarge, c6gn.medium, i3.large, c6gd.large, m3.medium, c5a.12xlarge, c5n.xlarge, r5ad.16xlarge, i3.metal
@davidlenz
davidlenz / get_aws_spot_price_history.py
Created July 5, 2022 18:55
get spot price history
import pandas as pd
from datetime import datetime
from datetime import timedelta
ec2c = boto3.client('ec2')
ec2r = boto3.resource('ec2')
#### The rest of this code maps the instance details to spot price in case you are looking for certain memory or cpu
paginator = ec2c.get_paginator('describe_instance_types')
response_iterator = paginator.paginate( )
@davidlenz
davidlenz / cancel jobs in aws batch job queue
Last active February 25, 2022 17:41
cancel all jobs in a job queue
import time
import boto3
job_queue = <'job-queue-name'>
client=boto3.client("batch")
states = ['RUNNABLE','SUBMITTED','PENDING','STARTING','RUNNING']
for state in states:
print(state)
@davidlenz
davidlenz / jensen-shannon-divergence.py
Last active December 5, 2020 07:05
Implementation of Jensen-Shannon-Divergence based on https://github.com/scipy/scipy/issues/8244
import numpy as np
from scipy.stats import entropy
def js(p, q):
p = np.asarray(p)
q = np.asarray(q)
# normalize
p /= p.sum()
q /= q.sum()
m = (p + q) / 2
@davidlenz
davidlenz / heise_scraper.py
Last active August 6, 2019 18:12
Scrape the heise newsticker archive (https://www.heise.de/newsticker/archiv) using beatifulsoup.
import requests
import bs4 as bs
from bs4 import BeautifulSoup
import pandas as pd
import os
def get_timestamp():
import time, datetime
date_n_time = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H-%M-%S')
return date_n_time
@davidlenz
davidlenz / Export-Chocolatey.ps1
Created May 17, 2019 14:16 — forked from alimbada/Export-Chocolatey.ps1
Export installed Chocolatey packages as packages.config - thanks to Matty666
#Put this in Export-Chocolatey.ps1 file and run it:
#Export-Chocolatey.ps1 > packages.config
#You can install the packages using
#choco install packages.config -y
Write-Output "<?xml version=`"1.0`" encoding=`"utf-8`"?>"
Write-Output "<packages>"
choco list -lo -r -y | % { " <package id=`"$($_.SubString(0, $_.IndexOf("|")))`" version=`"$($_.SubString($_.IndexOf("|") + 1))`" />" }
Write-Output "</packages>"
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
url matching regex
http://daringfireball.net/2010/07/improved_regex_for_matching_urls
"""
"""
The regex patterns in this gist are intended to match any URLs,
@davidlenz
davidlenz / sendmails.py
Created July 25, 2018 09:40
Send Emails using python.
#!/usr/bin/env python
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from smtplib import SMTP
import smtplib
subject = 'Example header'
message = 'Subject: Happy Australia Day!\nHi Everyone! Happy Australia Day! Cheers, Julian'