Skip to content

Instantly share code, notes, and snippets.


Hamlet Batista hamletbatista

View GitHub Profile
View faqpage_generated.js
<title>Google Ads to Start Hiding Some Search Query Data</title>
<script type="application/ld+json">
"@context": "",
"@type": "FAQPage",
"mainEntity": [
"@type": "Question",
class QueGenerator():
def __init__(self):
self.que_model = T5ForConditionalGeneration.from_pretrained('./t5_que_gen_model/t5_base_que_gen/')
self.ans_model = T5ForConditionalGeneration.from_pretrained('./t5_ans_gen_model/t5_base_ans_gen/')
self.que_tokenizer = T5Tokenizer.from_pretrained('./t5_que_gen_model/t5_base_tok_que_gen/')
self.ans_tokenizer = T5Tokenizer.from_pretrained('./t5_ans_gen_model/t5_base_tok_ans_gen/')
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
from urllib.parse import urlparse
import re
print(set(re.split("[/-]", urlparse(url).path)))
#{'', 'B073JYC4XM', 'dp', '128GB', 'microSDXC', 'Memory', 'SanDisk', 'Adapter'}
from glob import glob
import os
from collections import defaultdict
from pprint import pprint
import PIL
def detectnet_input():
#images = glob(os.path.join(FLAGS.data_dir, '*.jpg'))
exclude= ["page tabs", "original price", "product name0055-0922", "promotion text"]
import cv2
def draw_bounding_boxes(file_path, prediction_result):
## read image file from disk
img = cv2.imread(file_path, cv2.IMREAD_COLOR)
height = img.shape[0] # Image height
width = img.shape[1] # Image width
#df should be pandas data frame with columns link and theme
import pickle
with open("theme_links.pkl", "wb") as f:
#Saving data frame as list of dicts
pickle.dump(df.set_index("link").to_dict(orient="index"), f)
# Scrape Shopify themes using
#!pip install scrapy
import scrapy
class ShopifyThemeSpider(scrapy.Spider):
name = 'bshopifyspider'
start_urls = ['']
View summary_text_250.json
[{"summary_text": "Around 16 percent of educators leave the field every year. Teachers report that the biggest reasons they leave are lack of autonomy and voice, alongside issues of culture and discipline. Turnover is costly, shorting districts upwards of $2.2 billion a year, according to a report by the National Council of Teachers of English, the nation's largest teacher organization. The report also found that teacher retention improves outcomes for students, and conversely, that teacher turnover can harm student achievement. The study was published in the Journal of Education, published by Simon & Schuster, a division of Penguin Random House, on November 14. For more, visit For confidential support, call the National Suicide Prevention Lifeline at 1-800-273-8255 or visit"}]
View backlink_text.txt