Skip to content

Instantly share code, notes, and snippets.

Avatar

Hamlet Batista hamletbatista

View GitHub Profile
View faqpage_generated.js
<title>Google Ads to Start Hiding Some Search Query Data</title>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "FAQPage",
"mainEntity": [
{
"@type": "Question",
View que_generator.py
class QueGenerator():
def __init__(self):
self.que_model = T5ForConditionalGeneration.from_pretrained('./t5_que_gen_model/t5_base_que_gen/')
self.ans_model = T5ForConditionalGeneration.from_pretrained('./t5_ans_gen_model/t5_base_ans_gen/')
self.que_tokenizer = T5Tokenizer.from_pretrained('./t5_que_gen_model/t5_base_tok_que_gen/')
self.ans_tokenizer = T5Tokenizer.from_pretrained('./t5_ans_gen_model/t5_base_tok_ans_gen/')
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
View keyworksFromUrl.py
from urllib.parse import urlparse
import re
url="https://www.amazon.com/SanDisk-128GB-microSDXC-Memory-Adapter/dp/B073JYC4XM/"
print(set(re.split("[/-]", urlparse(url).path)))
#output
#{'', 'B073JYC4XM', 'dp', '128GB', 'microSDXC', 'Memory', 'SanDisk', 'Adapter'}
View detectnet_to_automl.py
from glob import glob
import os
from collections import defaultdict
from pprint import pprint
import PIL
def detectnet_input():
#images = glob(os.path.join(FLAGS.data_dir, '*.jpg'))
exclude= ["page tabs", "original price", "product name0055-0922", "promotion text"]
View draw_bounding_boxes.py
import cv2
def draw_bounding_boxes(file_path, prediction_result):
## read image file from disk
img = cv2.imread(file_path, cv2.IMREAD_COLOR)
height = img.shape[0] # Image height
width = img.shape[1] # Image width
View shopify_theme_link_spider.py
View pickle_shopify_themes.py
#df should be pandas data frame with columns link and theme
import pickle
with open("theme_links.pkl", "wb") as f:
#Saving data frame as list of dicts
pickle.dump(df.set_index("link").to_dict(orient="index"), f)
View shopify_theme_spider.py
# Scrape Shopify themes using scrapy.org
#!pip install scrapy
#%%writefile shopify_theme_spider.py
import scrapy
class ShopifyThemeSpider(scrapy.Spider):
name = 'bshopifyspider'
start_urls = ['https://themes.shopify.com/themes?page=1']
View summary_text_250.json
[{"summary_text": "Around 16 percent of educators leave the field every year. Teachers report that the biggest reasons they leave are lack of autonomy and voice, alongside issues of culture and discipline. Turnover is costly, shorting districts upwards of $2.2 billion a year, according to a report by the National Council of Teachers of English, the nation's largest teacher organization. The report also found that teacher retention improves outcomes for students, and conversely, that teacher turnover can harm student achievement. The study was published in the Journal of Education, published by Simon & Schuster, a division of Penguin Random House, on November 14. For more, visit www.simonandschuster.com. For confidential support, call the National Suicide Prevention Lifeline at 1-800-273-8255 or visit http://www.suicidepreventionlifeline.org/."}]
View backlink_text.txt