Skip to content

Instantly share code, notes, and snippets.

View 2796gaurav's full-sized avatar
🏠
Working from home

Gaurav Chauhan 2796gaurav

🏠
Working from home
View GitHub Profile
#standardSQL
CREATE MODEL `bqml_tutorial.natality_model` # model name
OPTIONS
(model_type='linear_reg', # model type would be one of { 'LINEAR_REG' | 'LOGISTIC_REG' | 'KMEANS' | 'TENSORFLOW' }
input_label_cols=['weight_pounds']) AS # one which needs to predict
SELECT # imput data columns
weight_pounds,
is_male,
gestation_weeks,
mother_age,
# get our data to a dataframe
tweet_data = pd.DataFrame(data=tweet_text,columns=['data'])
# create analyzer object
analyzer = SentimentIntensityAnalyzer()
# as previous we will calculate sentiment
final_scores = 0
for tweet in list(set(tweet_text)):
# Authorization and call api
auth = tweepy.OAuthHandler('CONSUMER_KEY', 'CONSUMER_SECRET')
auth.set_access_token('ACCESS_KEY', 'ACCESS_KEY_SECRET')
api = tweepy.API(auth)
# Get tweets using Tweepy api search
max_tweets=1000
name = 'WIPRO' # name of the query that we want to search in twitter
quandl.ApiConfig.api_key = 'WRITE YOUR ACCOUNT API KEY'
# now as per writing this code i will mention the date, you will need to change this as per your needs just remember this
# in our news link that we defined initially, we were getting news of the past 6 months, so as per the information
# i will define 6 months between start_date and end_date
stock_data = quandl.get(stock_name, start_date='2018-03-01', end_date='2018-08-31')
stock_data.reset_index(level=0, inplace=True)
# to visualise plotly we will divide our dataset into positive
# using vader for sentiment analysis
# variable defined 0 initially
sent = 0
sent_val = []
analyzer = SentimentIntensityAnalyzer()
# loop over each title
for links in data_nw['title']:
news_title = [] # to get the news title
news_date = [] # to get the date at which the news published
# loop over each of news article and get the title of each news article
for results_url in unique_links:
results = requests.get(results_url)
results_text = BeautifulSoup(results.text)
if(results_text.find(class_='artTitle') != None):
# Remove the duplicate news articles based on News Title
unique_links = list(set(data))
# now iterate over each page to get the news
for pages in page_link:
html = requests.get(pages) # get the links of the news article
soup = BeautifulSoup(html.text,'html.parser')
sub_links = soup.find_all('a', class_='arial11_summ')
for links in sub_links:
sp = BeautifulSoup(str(links),'html.parser') # to get the href of the links
tag = sp.a
category_links = 'https://www.moneycontrol.com' + tag["href"]
# blank list which will populate the links of all news article
data = []
# for link of first page as it is defined previously
page_link = [url_list]
# now we have to populate page_link with multiple pagination links
html = requests.get(url_list)
soup = BeautifulSoup(str(html.text),'html.parser')
sub_links = soup.find_all('div', class_='pages MR10 MT15') # get the pagination links
# we have already defined the stock name according to the company
# as per it is mentioned in NIFTY(it will require afterwards in verifying
# with our real data using Quandl
stock_name = 'NSE/WIPRO'
url_list = 'https://name-of-news-website/stocks/company_info/stock_news.php?sc_id={}'.format('W') #here mention your preferred id
url_list