Skip to content

Instantly share code, notes, and snippets.

# 5 Years Data
DATA_RANGE_DAYS = 365*5
END_DATE = date.today()
START_DATE = END_DATE - timedelta(DATA_RANGE_DAYS)
def get_stock_data(scrip, start_date=START_DATE, end_date=END_DATE, is_index=False):
return get_history(symbol=scrip, start=start_date, end=end_date,index = is_index)
intents:
- goodbye
- greet
- recent_matches
- upcoming_matches
entities:
- team
slots:
team:
type: text
class ActionGetUpcomingMatches(Action):
def name(self):
return 'action_get_upcoming_matches'
def run(self, dispatcher, tracker, domain):
res = requests.get(CRIC_API_URL + "matches" + "?apikey=" + CRIC_API_KEY + "&offset=0")
if res.status_code == 200:
matches_data = res.json()["data"]
class ActionGetRecentMatches(Action):
def name(self):
return 'action_get_recent_matches'
def run(self, dispatcher, tracker, domain):
res = requests.get(CRIC_API_URL + "currentMatches" + "?apikey=" + CRIC_API_KEY + "&offset=0")
if res.status_code == 200:
matches_data = res.json()["data"]
matches_data.sort(key=lambda x: x["date"], reverse=True)
matches_data = [x for x in matches_data if "matchType" in x]
docs_path = 'sample_files'
ignore_words = ['Fig','like','e.g.','i.e.','one']
all_keywords = []
for filename in os.listdir(docs_path):
filepath = os.path.join(docs_path, filename)
if os.path.isfile(filepath) and filename.endswith('.pdf'):
print(f'Parsing file: {filename}')
try:
file_text = read_file(filepath)
filepath = 'sample_files/AI_Blockchain_Survey_2022.pdf'
file_text = read_file(filepath)
keywords = extract_keywords(file_text, min_word_length = 3)
create_word_cloud(keywords,maximum_words=200)
def create_word_cloud(keywords, maximum_words = 100, bg = 'white', cmap='Dark2',
maximum_font_size = 256, width = 3000, height = 2000,
random_state = 42, fig_w = 15, fig_h = 10, output_filepath = None):
# Convert keywords to dictionary with values and its occurences
word_could_dict=Counter(keywords)
wordcloud = WordCloud(background_color=bg, max_words=maximum_words, colormap=cmap,
stopwords=STOPWORDS, max_font_size=maximum_font_size,
random_state=random_state,
def extract_keywords(text, ignore_words = [],
min_word_length = 0,
ignore_numbers = True,
ignore_case = True):
# Remove words with special characters
filtered_text = ''.join(filter(lambda x:x in string.printable, text))
# Create word tokens from the text string
tokens = word_tokenize(filtered_text)
def read_file(filepath, use_method = 'textract'):
text = ""
if not os.path.isfile(filepath):
print(f'Invalid file:{filepath}')
else:
if use_method == 'textract':
return read_file_textract(filepath)
elif use_method == 'pypdf':
return read_file_pypdf(filepath)
def read_file_pypdf(filepath):
pdfFileObj = open(filepath,'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
num_pages = pdfReader.numPages
text = ""
# Read all the pages
for pg in range(num_pages):
page = pdfReader.getPage(pg)
text += page.extractText()
return text