Skip to content

Instantly share code, notes, and snippets.

View OriHoch's full-sized avatar

Ori Hoch OriHoch

  • Israel
View GitHub Profile
Vagrant.configure("2") do |config|
config.vm.box = "ubuntu/trusty"
config.vm.box_url = "https://cloud-images.ubuntu.com/vagrant/trusty/current/trusty-server-cloudimg-amd64-vagrant-disk1.box"
config.vm.provider "virtualbox" do |v|
v.memory = 256
end
provision_script = <<-SCR
wget -O celerybeat https://raw.githubusercontent.com/celery/celery/master/extra/generic-init.d/celerybeat
cp celerybeat /etc/init.d/
@OriHoch
OriHoch / test_stream_remote_resources_txt.py
Last active April 12, 2017 08:14
test case for tabulator csv parsing text files hack
import tabulator
from functools import partial
def _test_from_stream(stream, expected_content):
try:
stream.open()
except tabulator.stream.exceptions.FormatError as e:
if str(e) == "Format has been detected as HTML (not supported)":
pass
else:
#!/usr/bin/env python
from pymongo import MongoClient
import os
import requests
client = MongoClient(os.environ.get("MONGO_HOST", "localhost"), int(os.environ.get("MONGO_PORT", "27017")))
db = client[os.environ["MONGO_DB"]]
photoUnits = db['photoUnits']
data = requests.get("https://raw.githubusercontent.com/Beit-Hatfutsot/dbs-bagnowka-scrape/master/bagnowka_all.json").json()
search-result-exemption .company-stamp {
line-height: 1.1em;
width: 114px;
height: 55px;
position: absolute;
top: 22px;
right: 241px;
background: url(assets/img/stamp-company.svg);
transform: rotate(0deg);
text-align: center;
@OriHoch
OriHoch / example.py
Created December 11, 2017 10:14
datapackage example for working with knesset data
from datapackage import Package
from tabulator import Stream
package = Package('https://minio.oknesset.org/committees/datapackage.json')
print(package.resource_names)
protocols_parsed = package.get_resource('committee_meeting_protocols_parsed')
for protocol_num, protocol in enumerate(protocols_parsed.iter(keyed=True)):
print(protocol)
with Stream("https://minio.oknesset.org/committees/" + protocol["parts_object_name"], headers=1) as stream:
for part_num, part in enumerate(stream.iter(keyed=True)):
print(part)
{
"translatorID": "dcf19e16-0b1e-11e8-bed0-e4a4719186ba",
"translatorType": 1,
"label": "Migdar",
"creator": "Ori Hoch",
"target": "migdar",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": false,
@OriHoch
OriHoch / pipeline-spec.yaml
Last active March 4, 2018 14:05
resources one to many.
build_positions:
pipeline:
- run: load_resource
parameters:
url: data/datapackage.json
resource: input_resource
- run: split_resource
- run: dump.to_path
parameters:
out-path: data/splitted_resource
@OriHoch
OriHoch / install.sh
Created February 28, 2018 11:00
getting data from union list of israel Z39.50 servers
#!/usr/bin/env bash
wget https://pypi.python.org/packages/6a/34/8176b841926a2add20524a9f74c307ac5fe6e33e9f4af12a58e6f7223982/mollyZ3950-2.04-molly1.tar.gz#md5=a0e5d7bb395ae31026afc7f974711630
sudo pip2 install ./mollyZ3950-2.04-molly1.tar.gz
sudo pip2 install pymarc
@OriHoch
OriHoch / install.sh
Last active July 15, 2018 06:02
downloading committee protocol parts
pip3 install -U datapackage-pipelines
@OriHoch
OriHoch / checkpoint from old datapackage with non-standard date time.ipynb
Last active October 21, 2018 15:36
dataflows bug - loading and checkpointing from package with non-standard date/time format
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.