Nathan Zimmerman moradology

## spark-config.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                moradology
                / spark-config.md
            
            
              Created
              February 29, 2024 15:36
            
          
    Spark Configuration Flags

Configurations


Default Parallelism

spark.default.parallelism
Suggested Value: 32 to 5000
Description: Sets the default level of parallelism. 32 indicates a moderate level suitable for medium-sized clusters. This is just a starting point. You can crank this number way up and spark documentation suggests 1 per cpu across all executors. This default will not prevent a higher level of parallelism for any APIs other than RDD. This value may be one which changes according to expected job size and setting it via heuristics evaluated in infrastructure which kicks off EMR jobs is perhaps wise.


Driver Extra Java Options


## summarize_with_embeddings.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import pathlib
import subprocess
import tempfile
import textwrap

from langchain.llms import OpenAI
from langchain.chains import LLMChain

## clipboard.scala
import java.awt.datatransfer.StringSelection;
import java.awt.Toolkit;
import java.awt.datatransfer.Clipboard;
val stringSelection = new StringSelection(wkt);
val clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
clipboard.setContents(stringSelection, null);

## abc123.json1
{
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": [
          [

## feature-design.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                moradology
                / feature-design.md
            
            
              Last active
              June 20, 2019 15:38
            
              
                Thoughts about `Feature`
              
          
    feature design

Before deciding what our support for GIS features should look like in GT,
we should get a sense of how the term feature is already used to avoid
confusion. Unfortunately, there's no single source of truth for a question
like this; we'll have to settle for sampling from the important pieces of
discourse and evaluate in terms of them.
geojson spec

from https://tools.ietf.org/html/rfc7946#section-3.2

  
## serve-vt.py
#!/usr/bin/env python
try:
    # Python 3
    from http.server import HTTPServer, SimpleHTTPRequestHandler, test as test_orig
    import sys
    def test (*args):
        test_orig(*args, port=int(sys.argv[1]) if len(sys.argv) > 1 else 8000)
except ImportError: # Python 2
    from BaseHTTPServer import HTTPServer, test
    from SimpleHTTPServer import SimpleHTTPRequestHandler

## squares.geojson

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                moradology
                / squares.geojson
            
            
              Created
              March 1, 2019 20:36
            
              
                some fake geojson
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## before.js
(function(){

  var map = L.map('map', {
    center: [39.9522, -75.1639],
    zoom: 14
  });
  var Stamen_TonerLite = L.tileLayer('http://stamen-tiles-{s}.a.ssl.fastly.net/toner-lite/{z}/{x}/{y}.{ext}', {
    attribution: 'Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> &mdash; Map data &copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>',
    subdomains: 'abcd',
    minZoom: 0,

## user_stats.sql
WITH country_counts AS (
        SELECT cc.changeset_id,
           countries.name,
           cc.edit_count
          FROM (changesets_countries cc
            JOIN countries ON ((cc.country_id = countries.id)))
       ), chgset_country_counts AS (
        SELECT chg.user_id,
           country_counts.name,
           sum(country_counts.edit_count) AS edit_count

## whatever.json
{"paragraphs":[{"text":"import com.azavea._\n\nimport geotrellis.raster._\nimport geotrellis.spark._\nimport geotrellis.spark.io._\nimport geotrellis.spark.io.cog.COGLayerStorageMetadata\nimport geotrellis.spark.io.s3._\nimport geotrellis.spark.io.s3.cog._\nimport geotrellis.vector.Extent\nimport cats.effect.IO\nimport cats.implicits._\nimport com.amazonaws.services.s3.AmazonS3URI\nimport spire.syntax.cfor._\nimport org.apache.spark.SparkContext\nimport scala.util.Try\n\nimport java.util.concurrent.Executors\nimport scala.concurrent.ExecutionContext\n\n","user":"anonymous","dateUpdated":"2018-07-18T19:44:34+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nimport com.azavea._\n\nimport geotrellis.raster._\n\nimport geotrellis.spark._\n\nimport geotrellis.spark.io._\n\nimport geotrellis.spark.io.cog.COGLayerStorageMeta
	#!/usr/bin/env python
	# -- coding: utf-8 --
	import argparse
	import pathlib
	import subprocess
	import tempfile
	import textwrap

	from langchain.llms import OpenAI
	from langchain.chains import LLMChain
	import java.awt.datatransfer.StringSelection;
	import java.awt.Toolkit;
	import java.awt.datatransfer.Clipboard;
	val stringSelection = new StringSelection(wkt);
	val clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
	clipboard.setContents(stringSelection, null);
	{
	"type": "FeatureCollection",
	"features": [
	{
	"type": "Feature",
	"properties": {},
	"geometry": {
	"type": "Polygon",
	"coordinates": [
	[
	#!/usr/bin/env python
	try:
	# Python 3
	from http.server import HTTPServer, SimpleHTTPRequestHandler, test as test_orig
	import sys
	def test (*args):
	test_orig(*args, port=int(sys.argv[1]) if len(sys.argv) > 1 else 8000)
	except ImportError: # Python 2
	from BaseHTTPServer import HTTPServer, test
	from SimpleHTTPServer import SimpleHTTPRequestHandler
	(function(){

	var map = L.map('map', {
	center: [39.9522, -75.1639],
	zoom: 14
	});
	var Stamen_TonerLite = L.tileLayer('http://stamen-tiles-{s}.a.ssl.fastly.net/toner-lite/{z}/{x}/{y}.{ext}', {
	attribution: 'Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> — Map data © <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>',
	subdomains: 'abcd',
	minZoom: 0,
	WITH country_counts AS (
	SELECT cc.changeset_id,
	countries.name,
	cc.edit_count
	FROM (changesets_countries cc
	JOIN countries ON ((cc.country_id = countries.id)))
	), chgset_country_counts AS (
	SELECT chg.user_id,
	country_counts.name,
	sum(country_counts.edit_count) AS edit_count