Skip to content

Instantly share code, notes, and snippets.

@davidlenz
davidlenz / svg_to_pdf.py
Created April 24, 2018 15:32
SVG to PDF using svglib and reportlab
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPDF
def svg_to_pdf(in_path, out_path):
# svg to pdf
drawing = svg2rlg(in_path)
renderPDF.drawToFile(drawing, out_path)
@davidlenz
davidlenz / reddit_comment_stream_with_praw_example.py
Created April 24, 2018 16:22
Downloads all comments from given subreddits and also extracts text from urls in comments. Details the usage of reddit praw module.
import newsapi_v2
import findurls
import praw
import pandas as pd
import utils_func
import os
import time
import subreddit
import requests
from newspaper import fulltext
@davidlenz
davidlenz / binance_get_historical_klines.py
Created April 24, 2018 19:15 — forked from sammchardy/binance_get_historical_klines.py
Get historical Klines from Binance
# uses the date_to_milliseconds and interval_to_milliseconds functions
# https://gist.github.com/sammchardy/3547cfab1faf78e385b3fcb83ad86395
# https://gist.github.com/sammchardy/fcbb2b836d1f694f39bddd569d1c16fe
from binance.client import Client
import time
def get_historical_klines(symbol, interval, start_str, end_str=None):
"""Get Historical Klines from Binance
@davidlenz
davidlenz / scrape_newsapi.py
Last active April 26, 2018 13:26
Scrape the sources from the newsapi headers every 12 hours. https://newsapi.org/
import justext, time
import pandas as pd
import requests, urllib
import utils_func
def get_sources(key):
"""
retrieve all sources from newsapi, filter the german and english speaking
and return them as dataframe
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
url matching regex
http://daringfireball.net/2010/07/improved_regex_for_matching_urls
"""
"""
The regex patterns in this gist are intended to match any URLs,
@davidlenz
davidlenz / reddit_submissions_stream.py
Last active April 25, 2018 16:20
Stream reddit submissions with PRAW. Additionally finds urls in submissions and extracts their text.
import newsapi_v2
import findurls
import praw
import pandas as pd
import utils_func
import os
import subreddit
import requests
from newspaper import fulltext
@davidlenz
davidlenz / twitter_scraper.py
Last active April 25, 2018 17:39
Scrape Data from Twitter and extract Sentiment using VaderSentiment. Code is from https://www.pythonprogramming.net/
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sqlite3
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from unidecode import unidecode
import time
analyzer = SentimentIntensityAnalyzer()
@davidlenz
davidlenz / selenium_google_scrape.py
Created April 26, 2018 21:17
Search on Google and return list of results with urls. Tweaked from https://gist.github.com/azam-a/32b89944b98a3fd79d44ebfdac16b63d
# https://gist.github.com/azam-a/32b89944b98a3fd79d44ebfdac16b63d
import pandas as pd
import selenium
print('selenium.__version__: ', selenium.__version__)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@davidlenz
davidlenz / attention_lstm.py
Created April 27, 2018 10:07 — forked from mbollmann/attention_lstm.py
My attempt at creating an LSTM with attention in Keras
class AttentionLSTM(LSTM):
"""LSTM with attention mechanism
This is an LSTM incorporating an attention mechanism into its hidden states.
Currently, the context vector calculated from the attended vector is fed
into the model's internal states, closely following the model by Xu et al.
(2016, Sec. 3.1.2), using a soft attention model following
Bahdanau et al. (2014).
The layer expects two inputs instead of the usual one:
@davidlenz
davidlenz / jensen-shannon-divergence.py
Last active December 5, 2020 07:05
Implementation of Jensen-Shannon-Divergence based on https://github.com/scipy/scipy/issues/8244
import numpy as np
from scipy.stats import entropy
def js(p, q):
p = np.asarray(p)
q = np.asarray(q)
# normalize
p /= p.sum()
q /= q.sum()
m = (p + q) / 2