View repartition_pyspark_dataframe.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import monotonically_increasing_id, row_number | |
from pyspark.sql import Window | |
from functools import reduce | |
def partitionIt(size, num): | |
''' | |
Create a list of partition indices each of size num where number of groups is ceiling(len(seq)/num) | |
Args: | |
size (int): number of rows/elemets |
View tableau_server_export.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tableauserverclient as TSC | |
import pandas as pd | |
from io import StringIO | |
class Tableau_Server(object): | |
"""docstring for ClassName""" | |
def __init__(self,username, password,site_id,url, https = False): | |
super().__init__() # http://stackoverflow.com/questions/576169/understanding-python-super-with-init-methods |
View visualforce_embed_with_user.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<apex:page > | |
<html> | |
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script> | |
<!-- User Id in a span --> | |
<span id = 'user' style = 'display: none;'> | |
<apex:outputText label="Account Owner" value="{!$User.Id}"></apex:outputText> | |
</span> | |
<!-- Embed placeholder --> |
View reddit_posts_and_comments.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Reddit(): | |
def __init__(self,client_id, client_secret,user_agent='My agent'): | |
self.reddit = praw.Reddit(client_id=client_id, | |
client_secret=client_secret, | |
user_agent=user_agent) | |
def get_comments(self, submission): | |
# get comments information using the Post as a starting comment | |
comments = [RedditComment(author=submission.author, | |
commentid = submission.postid, |
View unnest_byseat.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyr) | |
setwd("~/Desktop/unnest") | |
fname = "file-name.csv" | |
df = read.csv(paste0(fname,'.csv'), stringsAsFactors = F) | |
df$seats = | |
sapply(1:nrow(df), function(x) { | |
seats = c(df[x,]$first_seat,df[x,]$last_seat) |
View stubhub_inventory_v2.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import base64 | |
import pprint | |
import pandas as pd | |
import json | |
from tqdm import tqdm | |
# https://stubhubapi.zendesk.com/hc/en-us/articles/220922687-Inventory-Search |
View hmtl_table_parser.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://srome.github.io/Parsing-HTML-Tables-in-Python-with-BeautifulSoup-and-pandas/ | |
class HTMLTableParser: | |
@staticmethod | |
def get_element(node): | |
# for XPATH we have to count only for nodes with same type! | |
length = len(list(node.previous_siblings)) + 1 | |
if (length) > 1: | |
return '%s:nth-child(%s)' % (node.name, length) | |
else: | |
return node.name |
View Large dataframe to csv in chunks in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = read.csv("your-df.csv") | |
# Number of items in each chunk | |
elements_per_chunk = 100000 | |
# List of vectors [1] 1:100000, [2] 100001:200000, ... | |
l = split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk)) | |
# Write large data frame to csv in chunks | |
fname = "inventory-cleaned.csv" |
View reddit-posts.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- Produces a responsive list of top ten posts from a subreddit /worldnews. Working jsfiddle http://jsfiddle.net/KobaKhit/t42zkbnk/ --> | |
<div id="posts"> | |
<h2> Today's top ten news <small>from <a href = '//reddit.com/r/worldnews' target = '_blank'>/r/worldnews</a></small></h2> | |
<hr> | |
<ul class="list-unstyled"></ul> | |
</div> | |
<!-- JS --> | |
<script src="https://rawgit.com/sahilm/reddit.js/master/reddit.js"></script> | |
<script src="https://code.jquery.com/jquery-2.1.3.min.js"></script> |