Skip to content

Instantly share code, notes, and snippets.

View ahmedbesbes's full-sized avatar
💭
Building things, one line of code at a time 💻

Ahmed BESBES ahmedbesbes

💭
Building things, one line of code at a time 💻
View GitHub Profile
@ahmedbesbes
ahmedbesbes / news.py
Last active March 30, 2018 13:38
script to fetch data
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from tqdm import tqdm
from functools import reduce
def getSources():
source_url = 'https://newsapi.org/v1/sources?language=en'
response = requests.get(source_url).json()
import torch
import torch.nn as nn
from torchvision import models
class MRNet(nn.Module):
def __init__(self):
super().__init__()
self.pretrained_model = models.alexnet(pretrained=True)
self.pooling_layer = nn.AdaptiveAvgPool2d(1)
self.classifer = nn.Linear(256, 1)
class KneePlot():
def __init__(self, cases, figsize=(15, 5)):
self.cases = cases
self.planes = {case: ['coronal', 'sagittal', 'axial'] for case in self.cases}
self.slice_nums = {}
for case in self.cases:
self.slice_nums[case] = {}
for plane in ['coronal', 'sagittal', 'axial']:
train_path = '../data/train/'
def load_one_stack(case, data_path=train_path, plane='coronal'):
fpath = '{}/{}/{}.npy'.format(data_path, plane, case)
return np.load(fpath)
def load_stacks(case, data_path=train_path):
x = {}
planes = ['coronal', 'sagittal', 'axial']
import json
import time
from bs4 import BeautifulSoup
import requests
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
data = {}
soup = get_soup(base_url + '/categories')
for category in soup.findAll('div', {'class': 'category-object'}):
name = category.find('h3', {'class': 'sub-category__header'}).text
name = name.strip()
data[name] = {}
sub_categories = category.find('div', {'class': 'sub-category-list'})
for sub_category in sub_categories.findAll('div', {'class': 'child-category'}):
sub_category_name = sub_category.find('a', {'class': 'sub-category-item'}).text
def extract_company_urls_form_page():
a_list = driver.find_elements_by_xpath('//a[@class="category-business-card card"]')
urls = [a.get_attribute('href') for a in a_list]
dedup_urls = list(set(urls))
return dedup_urls
def go_next_page():
try:
button = driver.find_element_by_xpath('//a[@class="button button--primary next-page"]')
return True, button
except NoSuchElementException:
return False, None
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument("--disable-extensions")
prefs = {"profile.managed_default_content_settings.images": 2}
options.add_experimental_option("prefs", prefs)
company_urls = {}
for category in tqdm_notebook(data):
for sub_category in tqdm_notebook(data[category], leave=False):
company_urls[sub_category] = []
url = base_url + data[category][sub_category] + "?numberofreviews=0&timeperiod=0&status=all"
driver.get(url)
try:
element_present = EC.presence_of_element_located(
(By.CLASS_NAME, 'category-business-card card'))