This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Thinking token levels | |
| const THINK_LEVEL = { | |
| HIGHEST: 31999, | |
| MIDDLE: 10000, | |
| BASIC: 4000, | |
| NONE: 0, | |
| }; | |
| // Language cues for thinking intensity | |
| const THINK_CUES = { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import base64 | |
| import json | |
| import requests | |
| from pathlib import Path | |
| from typing import Union, Optional | |
| class RemarkableError(Exception): | |
| pass | |
| def upload_to_remarkable( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| tag: | |
| @if [ $$(git rev-list $$(git describe --abbrev=0 --tags)..HEAD --count) -gt 0 ]; then \ | |
| if [ $$(git log -n 1 --oneline $$(git describe --abbrev=0 --tags)..HEAD CHANGELOG.md | wc -l) -gt 0 ]; then \ | |
| git tag $$(python setup.py --version) && git push --tags || echo 'Version already released, update your version!' | |
| else \ | |
| echo "CHANGELOG not updated since last release!"; \ | |
| exit 1; \ | |
| fi; \ | |
| else \ | |
| echo "No commits since last release!"; \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| from __future__ import unicode_literals | |
| import base64 | |
| import json | |
| import logging | |
| from urlparse import parse_qs | |
| import requests |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from urllib.parse import urlparse | |
| from pyspark.sql.functions import desc, asc | |
| from pyspark.sql.types import ( | |
| StructType, | |
| StructField, | |
| StringType, | |
| LongType, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import pyspark.sql.types as T | |
| from math import ceil | |
| def repartition_for_writing(df): | |
| count = df.count() | |
| sampled_df = get_sampled_df(df, count=count) | |
| string_column_sizes = get_string_column_sizes(sampled_df) | |
| num_files = get_num_files(count, df.schema, string_column_sizes) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import datetime | |
| import json | |
| BUCKET_NAME = "<s3_bucket_name>" | |
| INVENTORY_PREFIX = "<prefix_given_to_s3_inventory>" # Should have data/, hive/, and some dated folders inside of it | |
| ACCOUNT_CUID = "<your_canonical_user_id_for_cross_account>" # Account which is not the owner of S3 bucket, but trying to access it. Controls ROLE_ARN | |
| ROLE_ARN = "<role_in_cross_account_that_can_assume_to_main_account>" | |
| def role_arn_to_session(role_arn): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys, time, subprocess, socket, telnetlib | |
| from datetime import datetime | |
| from collections import defaultdict | |
| from boto.ec2.cloudwatch import CloudWatchConnection | |
| MAPPINGS = { | |
| # Memcached name: (AWS Name, AWS Metric Type, Calculation Method) | |
| 'uptime': ('Uptime', 'Count', 'gauge'), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| my_name_function() { | |
| # There aren't many use cases where we would really want to do things this | |
| # way instead of just using a global. Unless we are writing libraries in bash (the horror) | |
| # or have an extremely large script where we are not sure we won't be clobbering | |
| # variable names (equally terrifying) | |
| local __assign_my_results_to_this_variable=$1 | |
| local do_some_work=$(echo $ALL_MY_COMMANDS_NAMES | grep -v "bad commands") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_files_per_partition(df, partition_key, file_type="parquet", compression="snappy", byte_array_size=256): | |
| rows = df.count() | |
| print "Dataset has {} rows".format(rows) | |
| schema = df.schema | |
| num_partitions = 1 | |
| if partition_key is not None: | |
| num_partitions = df.select([partition_key]).distinct().count() | |
| print "Dataset has {} distinct partition keys".format(num_partitions) | |
| _df = df.drop(partition_key) | |
| schema = _df.schema |
NewerOlder