Hamlet Batista hamletbatista

## faqpage_generated.js
<title>Google Ads to Start Hiding Some Search Query Data</title>
<script type="application/ld+json">
    {
      "@context": "https://schema.org",
      "@type": "FAQPage",
      "mainEntity": [


    {
        "@type": "Question",

## que_generator.py
class QueGenerator():
  def __init__(self):
    self.que_model = T5ForConditionalGeneration.from_pretrained('./t5_que_gen_model/t5_base_que_gen/')
    self.ans_model = T5ForConditionalGeneration.from_pretrained('./t5_ans_gen_model/t5_base_ans_gen/')

    self.que_tokenizer = T5Tokenizer.from_pretrained('./t5_que_gen_model/t5_base_tok_que_gen/')
    self.ans_tokenizer = T5Tokenizer.from_pretrained('./t5_ans_gen_model/t5_base_tok_ans_gen/')

    self.device = 'cuda' if torch.cuda.is_available() else 'cpu'


## keyworksFromUrl.py
from urllib.parse import urlparse
import re

url="https://www.amazon.com/SanDisk-128GB-microSDXC-Memory-Adapter/dp/B073JYC4XM/"

print(set(re.split("[/-]", urlparse(url).path)))

#output
#{'', 'B073JYC4XM', 'dp', '128GB', 'microSDXC', 'Memory', 'SanDisk', 'Adapter'}

## detectnet_to_automl.py
from glob import glob
import os
from collections import defaultdict
from pprint import pprint
import PIL

def detectnet_input():
    #images = glob(os.path.join(FLAGS.data_dir, '*.jpg'))
    exclude= ["page tabs", "original price", "product name0055-0922", "promotion text"]

## draw_bounding_boxes.py
import cv2

def draw_bounding_boxes(file_path, prediction_result):

  ## read image file from disk
  img = cv2.imread(file_path, cv2.IMREAD_COLOR)

  height = img.shape[0] # Image height
  width = img.shape[1] # Image width

## shopify_theme_link_spider.py
import scrapy
import pickle

class ShopifyThemeLinkSpider(scrapy.Spider):

    name = 'shopifyspider'

    with open('theme_links.pkl', 'rb') as f:
      theme_links = pickle.load(f)

## pickle_shopify_themes.py
#df should be pandas data frame  with columns link and theme

import pickle

with open("theme_links.pkl", "wb") as f:
  #Saving data frame as list of dicts
  pickle.dump(df.set_index("link").to_dict(orient="index"), f)


## shopify_theme_spider.py
# Scrape Shopify themes using scrapy.org
#!pip install scrapy

#%%writefile shopify_theme_spider.py

import scrapy

class ShopifyThemeSpider(scrapy.Spider):
    name = 'bshopifyspider'
    start_urls = ['https://themes.shopify.com/themes?page=1']

## summary_text_250.json
[{"summary_text": "Around 16 percent of educators leave the field every year. Teachers report that the biggest reasons they leave are lack of autonomy and voice, alongside issues of culture and discipline. Turnover is costly, shorting districts upwards of $2.2 billion a year, according to a report by the National Council of Teachers of English, the nation's largest teacher organization. The report also found that teacher retention improves outcomes for students, and conversely, that teacher turnover can harm student achievement. The study was published in the Journal of Education, published by Simon & Schuster, a division of Penguin Random House, on November 14. For more, visit www.simonandschuster.com. For confidential support, call the National Suicide Prevention Lifeline at 1-800-273-8255 or visit http://www.suicidepreventionlifeline.org/."}]

## backlink_text.txt
We know that teacher retention improves outcomes for students, and conversely, that teacher turnover can harm student achievement. Around 16 percent of educators leave the field every year, and we know that many teachers, like me, leave within the first five years. Teachers report that the biggest reasons they leave are lack of autonomy and voice, alongside issues of culture and, in particular, discipline. What's more, turnover is costly—shorting districts upwards of $2.2 billion a year.
	<title>Google Ads to Start Hiding Some Search Query Data</title>
	<script type="application/ld+json">
	{
	"@context": "https://schema.org",
	"@type": "FAQPage",
	"mainEntity": [


	{
	"@type": "Question",
	class QueGenerator():
	def __init__(self):
	self.que_model = T5ForConditionalGeneration.from_pretrained('./t5_que_gen_model/t5_base_que_gen/')
	self.ans_model = T5ForConditionalGeneration.from_pretrained('./t5_ans_gen_model/t5_base_ans_gen/')

	self.que_tokenizer = T5Tokenizer.from_pretrained('./t5_que_gen_model/t5_base_tok_que_gen/')
	self.ans_tokenizer = T5Tokenizer.from_pretrained('./t5_ans_gen_model/t5_base_tok_ans_gen/')

	self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
	from urllib.parse import urlparse
	import re

	url="https://www.amazon.com/SanDisk-128GB-microSDXC-Memory-Adapter/dp/B073JYC4XM/"

	print(set(re.split("[/-]", urlparse(url).path)))

	#output
	#{'', 'B073JYC4XM', 'dp', '128GB', 'microSDXC', 'Memory', 'SanDisk', 'Adapter'}
	from glob import glob
	import os
	from collections import defaultdict
	from pprint import pprint
	import PIL

	def detectnet_input():
	#images = glob(os.path.join(FLAGS.data_dir, '*.jpg'))
	exclude= ["page tabs", "original price", "product name0055-0922", "promotion text"]
	import cv2

	def draw_bounding_boxes(file_path, prediction_result):

	## read image file from disk
	img = cv2.imread(file_path, cv2.IMREAD_COLOR)

	height = img.shape[0] # Image height
	width = img.shape[1] # Image width
	import scrapy
	import pickle

	class ShopifyThemeLinkSpider(scrapy.Spider):

	name = 'shopifyspider'

	with open('theme_links.pkl', 'rb') as f:
	theme_links = pickle.load(f)
	#df should be pandas data frame with columns link and theme

	import pickle

	with open("theme_links.pkl", "wb") as f:
	#Saving data frame as list of dicts
	pickle.dump(df.set_index("link").to_dict(orient="index"), f)
	# Scrape Shopify themes using scrapy.org
	#!pip install scrapy

	#%%writefile shopify_theme_spider.py

	import scrapy

	class ShopifyThemeSpider(scrapy.Spider):
	name = 'bshopifyspider'
	start_urls = ['https://themes.shopify.com/themes?page=1']