Skip to content

Instantly share code, notes, and snippets.

View merfrei's full-sized avatar
🖥️
Coding

Emiliano M. Rudenick merfrei

🖥️
Coding
View GitHub Profile
@merfrei
merfrei / maersk_vessel_crawl.py
Last active November 8, 2021 15:47
MAERSK Vessel Crawl
"""
Scrap the full list of active vessels form MAERSK
"""
import asyncio
import re
import csv
from typing import IO
import requests
from w3lib.url import add_or_replace_parameter
@merfrei
merfrei / fix_json_unescaped_quotes.py
Last active December 2, 2020 02:05
Fix JSON unescaped quotes
def fix_json_unescaped_quotes(json_str):
"""For example:
"name":"Under Armour Women's On 3" Shorts"
=>
"name":"Under Armour Women's On 3\" Shorts"
"""
new_json_str = ''
is_open = False # When : is found in a declaration
is_started = False # When " is found after :
for j_ix, j_chr in enumerate(json_str):
@merfrei
merfrei / scrapy_puppeteer_api_blocking.py
Last active November 27, 2020 19:26
Scrapy - Puppeteer API Utils
# Custom blocking detection and retries with puppeteer API example
import logging
from functools import wraps
logger = logging.getLogger(__name__)
BLOCKED_CODES = [403, 500, 502, 503, 504]
@merfrei
merfrei / check-envio.py
Last active December 19, 2015 01:29
Notificación para envíos de OCA en Python usando pynotify
#!/usr/bin/env python
import urllib2
import json
def check_envio(num_envio):
url = 'http://www.enviosoca.com/Tracking/GetLastTrackingData/default.aspx?trackingNumber=%s' % (num_envio,)
resp = urllib2.urlopen(url)
json_data = resp.read()
return json.loads(json_data)
# -*- coding: utf-8 -*-
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy.item import Item, Field
from scrapy.contrib.loader import XPathItemLoader
from scrapy.contrib.loader.processor import MapCompose, TakeFirst
from scrapy.contrib.exporter import CsvItemExporter
from scrapy.conf import settings