Gaurav Chauhan 2796gaurav

## BQML.py
#standardSQL
CREATE MODEL `bqml_tutorial.natality_model`   # model name
OPTIONS
  (model_type='linear_reg',                   # model type would be one of { 'LINEAR_REG' | 'LOGISTIC_REG' | 'KMEANS' | 'TENSORFLOW' }
    input_label_cols=['weight_pounds']) AS    # one which needs to predict
SELECT # imput data columns
  weight_pounds,
  is_male,
  gestation_weeks,
  mother_age,

## code8.py
# get our data to a dataframe

tweet_data = pd.DataFrame(data=tweet_text,columns=['data'])

# create analyzer object
analyzer = SentimentIntensityAnalyzer()

# as previous we will calculate sentiment
final_scores = 0
for tweet in list(set(tweet_text)):

## code7.py
# Authorization and call api
auth = tweepy.OAuthHandler('CONSUMER_KEY', 'CONSUMER_SECRET')
auth.set_access_token('ACCESS_KEY', 'ACCESS_KEY_SECRET')
api = tweepy.API(auth)

# Get tweets using Tweepy api search
max_tweets=1000

name = 'WIPRO' # name of the query that we want to search in twitter

## code6.py
quandl.ApiConfig.api_key = 'WRITE YOUR ACCOUNT API KEY'

# now as per writing this code i will mention the date, you will need to change this as per your needs just remember this
# in our news link that we defined initially, we were getting news of the past 6 months, so as per the information
# i will define 6 months between start_date and end_date

stock_data = quandl.get(stock_name, start_date='2018-03-01', end_date='2018-08-31')
stock_data.reset_index(level=0, inplace=True)

# to visualise plotly we will divide our dataset into positive

## code5.py
# using vader for sentiment analysis

# variable defined 0 initially
sent = 0
sent_val = []

analyzer = SentimentIntensityAnalyzer()

# loop over each title
for links in data_nw['title']:

## code4.py

news_title = [] # to get the news title
news_date = []  # to get the date at which the news published


# loop over each of news article and get the title of each news article
for results_url in unique_links:
    results = requests.get(results_url)
    results_text = BeautifulSoup(results.text)
    if(results_text.find(class_='artTitle') != None):

## code3.py
# Remove the duplicate news articles based on News Title
unique_links = list(set(data))

## code2.py
# now iterate over each page to get the news
for pages in page_link:
        html = requests.get(pages) # get the links of the news article
        soup = BeautifulSoup(html.text,'html.parser')
        sub_links = soup.find_all('a', class_='arial11_summ')

        for links in sub_links:
            sp = BeautifulSoup(str(links),'html.parser')  # to get the href of the links
            tag = sp.a
            category_links = 'https://www.moneycontrol.com' + tag["href"]

## code1.py
# blank list which will populate the links of all news article
data = []

# for link of first page as it is defined previously
page_link = [url_list]

# now we have to populate page_link with multiple pagination links
html = requests.get(url_list)
soup = BeautifulSoup(str(html.text),'html.parser')
sub_links = soup.find_all('div', class_='pages MR10 MT15') # get the pagination links

## stockname.py
# we have already defined the stock name according to the company
# as per it is mentioned in NIFTY(it will require afterwards in verifying
# with our real data using Quandl

stock_name = 'NSE/WIPRO'
url_list = 'https://name-of-news-website/stocks/company_info/stock_news.php?sc_id={}'.format('W') #here mention your preferred id
url_list
	#standardSQL
	CREATE MODEL `bqml_tutorial.natality_model` # model name
	OPTIONS
	(model_type='linear_reg', # model type would be one of { 'LINEAR_REG' \| 'LOGISTIC_REG' \| 'KMEANS' \| 'TENSORFLOW' }
	input_label_cols=['weight_pounds']) AS # one which needs to predict
	SELECT # imput data columns
	weight_pounds,
	is_male,
	gestation_weeks,
	mother_age,
	# get our data to a dataframe

	tweet_data = pd.DataFrame(data=tweet_text,columns=['data'])

	# create analyzer object
	analyzer = SentimentIntensityAnalyzer()

	# as previous we will calculate sentiment
	final_scores = 0
	for tweet in list(set(tweet_text)):
	# Authorization and call api
	auth = tweepy.OAuthHandler('CONSUMER_KEY', 'CONSUMER_SECRET')
	auth.set_access_token('ACCESS_KEY', 'ACCESS_KEY_SECRET')
	api = tweepy.API(auth)

	# Get tweets using Tweepy api search
	max_tweets=1000

	name = 'WIPRO' # name of the query that we want to search in twitter
	quandl.ApiConfig.api_key = 'WRITE YOUR ACCOUNT API KEY'

	# now as per writing this code i will mention the date, you will need to change this as per your needs just remember this
	# in our news link that we defined initially, we were getting news of the past 6 months, so as per the information
	# i will define 6 months between start_date and end_date

	stock_data = quandl.get(stock_name, start_date='2018-03-01', end_date='2018-08-31')
	stock_data.reset_index(level=0, inplace=True)

	# to visualise plotly we will divide our dataset into positive
	# using vader for sentiment analysis

	# variable defined 0 initially
	sent = 0
	sent_val = []

	analyzer = SentimentIntensityAnalyzer()

	# loop over each title
	for links in data_nw['title']:

	news_title = [] # to get the news title
	news_date = [] # to get the date at which the news published


	# loop over each of news article and get the title of each news article
	for results_url in unique_links:
	results = requests.get(results_url)
	results_text = BeautifulSoup(results.text)
	if(results_text.find(class_='artTitle') != None):
	# Remove the duplicate news articles based on News Title
	unique_links = list(set(data))
	# now iterate over each page to get the news
	for pages in page_link:
	html = requests.get(pages) # get the links of the news article
	soup = BeautifulSoup(html.text,'html.parser')
	sub_links = soup.find_all('a', class_='arial11_summ')

	for links in sub_links:
	sp = BeautifulSoup(str(links),'html.parser') # to get the href of the links
	tag = sp.a
	category_links = 'https://www.moneycontrol.com' + tag["href"]
	# blank list which will populate the links of all news article
	data = []

	# for link of first page as it is defined previously
	page_link = [url_list]

	# now we have to populate page_link with multiple pagination links
	html = requests.get(url_list)
	soup = BeautifulSoup(str(html.text),'html.parser')
	sub_links = soup.find_all('div', class_='pages MR10 MT15') # get the pagination links
	# we have already defined the stock name according to the company
	# as per it is mentioned in NIFTY(it will require afterwards in verifying
	# with our real data using Quandl

	stock_name = 'NSE/WIPRO'
	url_list = 'https://name-of-news-website/stocks/company_info/stock_news.php?sc_id={}'.format('W') #here mention your preferred id
	url_list