Skip to content

Instantly share code, notes, and snippets.

View geobabbler's full-sized avatar
💭
Working

William Dollins geobabbler

💭
Working
View GitHub Profile
@geobabbler
geobabbler / scrape_photos.py
Created July 19, 2024 11:01
Script to download sample images from Wikimedia Commons
import requests
from bs4 import BeautifulSoup
import os
qry = "glacier" #wikimedia commons query
# Define the search URL
search_url = f"https://commons.wikimedia.org/w/index.php?search={qry}&title=Special:MediaSearch&go=Go&type=image"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
@geobabbler
geobabbler / ddg_rag.py
Last active August 27, 2024 16:04
Simple RAG is ChatGPT and DuckDuckGo
import requests
import openai
import urllib
import json
from duckduckgo_search import DDGS
# Set your OpenAI API key here
openai.api_key = "your-openai-api-key"
def search_duckduckgo(query):
@geobabbler
geobabbler / image_embeddings_ddl.sql
Created July 19, 2024 16:18
DDL for image embeddings table
-- public.image_embeddings definition
-- Drop table
-- DROP TABLE public.image_embeddings;
CREATE TABLE public.image_embeddings (
id serial4 NOT NULL,
image_path text NULL,
embedding public.vector NULL,
@geobabbler
geobabbler / resnet_query.py
Created July 19, 2024 11:09
Script to perform image similarity search using pgvector
import os
import psycopg2
import numpy as np
from PIL import Image
from scipy.spatial.distance import cosine
import torch
from torchvision import models, transforms
# Database configuration
DB_NAME = "database"
@geobabbler
geobabbler / resnet_embed.py
Created July 19, 2024 11:05
Script to read folder of images, generate embeddings, and write them to pgvector.
import os
import psycopg2
from PIL import Image
import torch
from torchvision import models, transforms
# Database configuration
DB_NAME = "database"
DB_USER = "user"
DB_PASSWORD = "password"
@geobabbler
geobabbler / wp2pdf.py
Created March 26, 2024 11:46
Simple script to dump Wordpress posts to individual PDF files.
import pdfkit
import requests
'''
The current theme will be applied to outputs, so it is recommended to switch to a simple theme before exporting.
'''
#Generate PDF from individual post URL
def url_to_pdf(url, output_filename):
try:
@geobabbler
geobabbler / MBTilesProvider.cs
Created February 25, 2014 17:13
Simple class to access MBTiles in C#
public class MBTilesProvider
{
public GeoExtent Bounds { get; private set; }
public CoordinatePair Center { get; private set; }
public int MinZoom { get; private set; }
public int MaxZoom { get; private set; }
public string Name { get; private set; }
public string Description { get; private set; }
public string MBTilesVersion { get; private set; }
public string Path { get; private set; }
@geobabbler
geobabbler / PostgisDialectExtensions.java
Last active August 13, 2021 04:48
Simple extension to PostGIS dialect for Hibernate
package com.geomusings.dialect;
//import org.hibernate.dialect.PostgreSQLDialect;
import org.hibernate.dialect.function.StandardSQLFunction;
import org.hibernate.type.CustomType;
import org.hibernate.type.StandardBasicTypes;
import org.hibernate.type.Type;
import org.hibernate.spatial.dialect.postgis.PostgisDialect;
import org.hibernate.usertype.UserType;
import org.hibernate.spatial.SpatialAggregate;
@geobabbler
geobabbler / vw_stops.sql
Created March 11, 2021 12:01
SQL Snippet for 2020-03-11 post on geoMusings
SELECT
ARRAY_TO_STRING(ARRAY_AGG(CONCAT(st_x(q.loc),',',st_y(q.loc))), ';') AS coords
FROM (
WITH
DATA AS (
SELECT
loc,
resource_id,
wkt,
capture_date,
@geobabbler
geobabbler / bigquery_sample_20210222.sql
Created February 22, 2021 18:37
SQL sample for blog post - 22 Feb 2021
WITH data AS (
SELECT name, loc, resource_id, wkt, capture_date, (LAG(capture_date) OVER (PARTITION BY resource_id ORDER BY capture_date ASC)) AS prev_date,
(LAG(wkt) OVER (PARTITION BY resource_id ORDER BY capture_date ASC)) AS prev_loc
FROM
(SELECT name, resource_id, loc, st_astext(loc) as wkt, capture_date FROM `my_project.my_dataset.geo_sample`
order by capture_date desc) q
ORDER BY capture_date
)
SELECT name, resource_id, loc, capture_date FROM data
WHERE