Skip to content

Instantly share code, notes, and snippets.

@wvengen
wvengen / scrapy_h2_proxy.py
Last active October 26, 2023 16:17
Scrapy download handler for HTTP/2 over a HTTP/1 proxy (TLS only).
"""
Download handler for HTTP/2 supporting proxy CONNECT over HTTP/1.
You can use this by configuring it in the spider settings:
DOWNLOAD_HANDLERS = {
'https': 'scrapy_h2_proxy.H2DownloadHandler',
}
Tested with Scrapy 2.5.0.
@gxercavins
gxercavins / schema-in-side-input.py
Created December 27, 2019 18:55
SO question 59458599
import argparse, json, logging
import apache_beam as beam
import apache_beam.pvalue as pvalue
from apache_beam.io import ReadFromText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class EnrichElementsFn(beam.DoFn):
@gxercavins
gxercavins / one_window_one_file.py
Last active December 1, 2020 23:22
Stackoverflow question 56234318
import argparse, json, logging, time
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.io import filesystems
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class AddWindowingInfoFn(beam.DoFn):