Alaa Khaled alaakh42

## gist:6aca4453ad3a460177d3d02805a4015f
Multicollinearity → is the fact that one independent variable is dependent on another independent variable. like the 2 dummy variables New York and California
For example, In case of ‘Dummy Variable’ - which is the encoding of the categorical variable into numerical variables - if there is 2 levels in a categorical column called ‘State’: [New York, California]
State
New York
California
California
New York
California

then the Dummy variables will be: D2 = 1 - D1

## keras_gensim_embeddings.py
from __future__ import print_function

import json
import os
import numpy as np

from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
from keras.engine import Input
from keras.layers import Embedding, merge

## crawler1.py
import re
import urllib
import requests
import wikipedia
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

## crawler2.py
first_col = []
teams_links = []
summary = []
history = []
stadiums = []
locations = []
stadiums_capcity = []
table = soup.find("table", style="text-align: left;")
table_body = table.find("tbody")

## crawler3.py
df = pd.DataFrame({'Team': first_col,
     'Summary': summary,
     'History': history,
     'Team_Page': teams_links,
     'Location': locations,
     'Stadium': stadiums,
     'Stadiums_Capcity': stadiums_capcity
    })

# crawling the history of those three clubs were somehow tricky so I had to hard code the section names myself

## laliga_intents_config.yml
language: "en"

pipeline:
- name: "nlp_spacy"
- name: "tokenizer_spacy"
- name: "intent_entity_featurizer_regex"
- name: "intent_featurizer_spacy"
- name: "ner_crf"
- name: "ner_synonyms"
- name: "intent_classifier_sklearn"

## generate_intent.py
from rasa_nlu.model import Interpreter
import json

interpreter = Interpreter.load("./models/current/nlu")
message = ' '.join([x.strip() for x in "tell me about the foundation of the Barcelona team".split()])
result = interpreter.parse(message)
print(json.dumps(result, indent=2))

## intent.json
{
  "entities": [],
  "intent": {
    "confidence": 0.9892104618120521,
    "name": "laliga_questions"
  },
  "text": "tell me about the foundation of the Barcelona team",
  "intent_ranking": [
    {
      "confidence": 0.9892104618120521,

## generate_bot_response.py
import spacy
import random

urw_replies = [u'Happy to help you :)', u'You are more than Welcome! :)', u'No problem, Anytime! :)']
nonsense_replies = [u"Sorry, I don't understand what you are saying", u"Sorry, I cannot help you on that!",
                    u"That's not my area of expertise"]
break_the_ice_replies = [u"Hello, Anything I can do for you?", u"Hi there! How can I help you today?"]
friendly_replies = [u"Sure! ", u"Sure! Let's see what we have here "]

nlp = spacy.load('xx_ent_wiki_sm')

## app.py
#Python libraries that we need to import for our bot
import random
from flask import Flask, request
from pymessenger.bot import Bot
from core import get_bot_response

app = Flask(__name__)

ACCESS_TOKEN = #<ACCESS_TOKEN>
VERIFY_TOKEN = #<ACCESS_TOKEN>
	Multicollinearity → is the fact that one independent variable is dependent on another independent variable. like the 2 dummy variables New York and California
	For example, In case of ‘Dummy Variable’ - which is the encoding of the categorical variable into numerical variables - if there is 2 levels in a categorical column called ‘State’: [New York, California]
	State
	New York
	California
	California
	New York
	California

	then the Dummy variables will be: D2 = 1 - D1
	from __future__ import print_function

	import json
	import os
	import numpy as np

	from gensim.models import Word2Vec
	from gensim.utils import simple_preprocess
	from keras.engine import Input
	from keras.layers import Embedding, merge
	import re
	import urllib
	import requests
	import wikipedia
	import numpy as np
	import pandas as pd
	from bs4 import BeautifulSoup

	hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	first_col = []
	teams_links = []
	summary = []
	history = []
	stadiums = []
	locations = []
	stadiums_capcity = []
	table = soup.find("table", style="text-align: left;")
	table_body = table.find("tbody")
	df = pd.DataFrame({'Team': first_col,
	'Summary': summary,
	'History': history,
	'Team_Page': teams_links,
	'Location': locations,
	'Stadium': stadiums,
	'Stadiums_Capcity': stadiums_capcity
	})

	# crawling the history of those three clubs were somehow tricky so I had to hard code the section names myself
	language: "en"

	pipeline:
	- name: "nlp_spacy"
	- name: "tokenizer_spacy"
	- name: "intent_entity_featurizer_regex"
	- name: "intent_featurizer_spacy"
	- name: "ner_crf"
	- name: "ner_synonyms"
	- name: "intent_classifier_sklearn"
	from rasa_nlu.model import Interpreter
	import json

	interpreter = Interpreter.load("./models/current/nlu")
	message = ' '.join([x.strip() for x in "tell me about the foundation of the Barcelona team".split()])
	result = interpreter.parse(message)
	print(json.dumps(result, indent=2))
	{
	"entities": [],
	"intent": {
	"confidence": 0.9892104618120521,
	"name": "laliga_questions"
	},
	"text": "tell me about the foundation of the Barcelona team",
	"intent_ranking": [
	{
	"confidence": 0.9892104618120521,
	import spacy
	import random

	urw_replies = [u'Happy to help you :)', u'You are more than Welcome! :)', u'No problem, Anytime! :)']
	nonsense_replies = [u"Sorry, I don't understand what you are saying", u"Sorry, I cannot help you on that!",
	u"That's not my area of expertise"]
	break_the_ice_replies = [u"Hello, Anything I can do for you?", u"Hi there! How can I help you today?"]
	friendly_replies = [u"Sure! ", u"Sure! Let's see what we have here "]

	nlp = spacy.load('xx_ent_wiki_sm')
	#Python libraries that we need to import for our bot
	import random
	from flask import Flask, request
	from pymessenger.bot import Bot
	from core import get_bot_response

	app = Flask(__name__)

	ACCESS_TOKEN = #<ACCESS_TOKEN>
	VERIFY_TOKEN = #<ACCESS_TOKEN>