Koba Khitalishvili KobaKhit

## sql-spines.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                KobaKhit
                / sql-spines.md
            
            
              Created
              February 8, 2024 02:58
            
              
                Examples of generating spines/dates in SQL. Assisted by Caleb Kassa.
              
          
    Spines in SQL

Given a starting date 2024-02-01 I would like to generate 7 days into the future until February 8th (2024-02-08), ex.g.


dt


2024-02-01


2024-02-02


2024-02-03


2024-02-04


## repartition_pyspark_dataframe.py
from pyspark.sql.functions import monotonically_increasing_id, row_number
from pyspark.sql import Window
from functools import reduce

def partitionIt(size, num):
    '''
    Create a list of partition indices each of size num where number of groups is ceiling(len(seq)/num)

    Args:
        size (int): number of rows/elemets

## tableau_server_export.py
import tableauserverclient as TSC
import pandas as pd
from io import StringIO

class Tableau_Server(object):

    """docstring for ClassName"""
    def __init__(self,username, password,site_id,url, https = False):
        super().__init__() # http://stackoverflow.com/questions/576169/understanding-python-super-with-init-methods

## visualforce_embed_with_user.html
<apex:page >
<html>
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script>

  <!-- User Id in a span -->
  <span id = 'user' style = 'display: none;'>
    <apex:outputText label="Account Owner" value="{!$User.Id}"></apex:outputText>
  </span>

  <!-- Embed placeholder -->

## reddit_posts_and_comments.py
class Reddit():
    def __init__(self,client_id, client_secret,user_agent='My agent'):
        self.reddit = praw.Reddit(client_id=client_id,
                                  client_secret=client_secret,
                                  user_agent=user_agent)

    def get_comments(self, submission):
        # get comments information using the Post as a starting comment
        comments = [RedditComment(author=submission.author,
                                 commentid = submission.postid,

## unnest_byseat.R
library(tidyr)

setwd("~/Desktop/unnest")

fname = "file-name.csv"
df = read.csv(paste0(fname,'.csv'), stringsAsFactors = F)

df$seats =
sapply(1:nrow(df), function(x) {
 seats =  c(df[x,]$first_seat,df[x,]$last_seat)

## stubhub_inventory_v2.py
import requests
import base64
import pprint

import pandas as pd
import json
from tqdm import tqdm

# https://stubhubapi.zendesk.com/hc/en-us/articles/220922687-Inventory-Search

## hmtl_table_parser.py
# http://srome.github.io/Parsing-HTML-Tables-in-Python-with-BeautifulSoup-and-pandas/
class HTMLTableParser:
    @staticmethod
    def get_element(node):
        # for XPATH we have to count only for nodes with same type!
        length = len(list(node.previous_siblings)) + 1
        if (length) > 1:
            return '%s:nth-child(%s)' % (node.name, length)
        else:
            return node.name

## Large dataframe to csv in chunks in R
df = read.csv("your-df.csv")

# Number of items in each chunk
elements_per_chunk = 100000

# List of vectors [1] 1:100000, [2] 100001:200000, ...
l = split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk))

# Write large data frame to csv in chunks
fname = "inventory-cleaned.csv"

## reddit-posts.html
<!-- Produces a responsive list of top ten posts from a subreddit /worldnews. Working jsfiddle http://jsfiddle.net/KobaKhit/t42zkbnk/ -->
<div id="posts">
    <h2> Today's top ten news <small>from <a href = '//reddit.com/r/worldnews' target = '_blank'>/r/worldnews</a></small></h2>
    <hr>
    <ul class="list-unstyled"></ul>
</div>

<!-- JS -->
<script src="https://rawgit.com/sahilm/reddit.js/master/reddit.js"></script>
<script src="https://code.jquery.com/jquery-2.1.3.min.js"></script>
	from pyspark.sql.functions import monotonically_increasing_id, row_number
	from pyspark.sql import Window
	from functools import reduce

	def partitionIt(size, num):
	'''
	Create a list of partition indices each of size num where number of groups is ceiling(len(seq)/num)

	Args:
	size (int): number of rows/elemets
	import tableauserverclient as TSC
	import pandas as pd
	from io import StringIO

	class Tableau_Server(object):

	"""docstring for ClassName"""
	def __init__(self,username, password,site_id,url, https = False):
	super().__init__() # http://stackoverflow.com/questions/576169/understanding-python-super-with-init-methods
	<apex:page >
	<html>
	<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script>

	<!-- User Id in a span -->
	<span id = 'user' style = 'display: none;'>
	<apex:outputText label="Account Owner" value="{!$User.Id}"></apex:outputText>
	</span>

	<!-- Embed placeholder -->
	class Reddit():
	def __init__(self,client_id, client_secret,user_agent='My agent'):
	self.reddit = praw.Reddit(client_id=client_id,
	client_secret=client_secret,
	user_agent=user_agent)

	def get_comments(self, submission):
	# get comments information using the Post as a starting comment
	comments = [RedditComment(author=submission.author,
	commentid = submission.postid,
	library(tidyr)

	setwd("~/Desktop/unnest")

	fname = "file-name.csv"
	df = read.csv(paste0(fname,'.csv'), stringsAsFactors = F)

	df$seats =
	sapply(1:nrow(df), function(x) {
	seats = c(df[x,]$first_seat,df[x,]$last_seat)
	import requests
	import base64
	import pprint

	import pandas as pd
	import json
	from tqdm import tqdm

	# https://stubhubapi.zendesk.com/hc/en-us/articles/220922687-Inventory-Search
	# http://srome.github.io/Parsing-HTML-Tables-in-Python-with-BeautifulSoup-and-pandas/
	class HTMLTableParser:
	@staticmethod
	def get_element(node):
	# for XPATH we have to count only for nodes with same type!
	length = len(list(node.previous_siblings)) + 1
	if (length) > 1:
	return '%s:nth-child(%s)' % (node.name, length)
	else:
	return node.name
	df = read.csv("your-df.csv")

	# Number of items in each chunk
	elements_per_chunk = 100000

	# List of vectors [1] 1:100000, [2] 100001:200000, ...
	l = split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk))

	# Write large data frame to csv in chunks
	fname = "inventory-cleaned.csv"
	<!-- Produces a responsive list of top ten posts from a subreddit /worldnews. Working jsfiddle http://jsfiddle.net/KobaKhit/t42zkbnk/ -->
	<div id="posts">
	<h2> Today's top ten news <small>from <a href = '//reddit.com/r/worldnews' target = '_blank'>/r/worldnews</a></small></h2>
	<hr>
	<ul class="list-unstyled"></ul>
	</div>

	<!-- JS -->
	<script src="https://rawgit.com/sahilm/reddit.js/master/reddit.js"></script>
	<script src="https://code.jquery.com/jquery-2.1.3.min.js"></script>