This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 5 Years Data | |
DATA_RANGE_DAYS = 365*5 | |
END_DATE = date.today() | |
START_DATE = END_DATE - timedelta(DATA_RANGE_DAYS) | |
def get_stock_data(scrip, start_date=START_DATE, end_date=END_DATE, is_index=False): | |
return get_history(symbol=scrip, start=start_date, end=end_date,index = is_index) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
intents: | |
- goodbye | |
- greet | |
- recent_matches | |
- upcoming_matches | |
entities: | |
- team | |
slots: | |
team: | |
type: text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ActionGetUpcomingMatches(Action): | |
def name(self): | |
return 'action_get_upcoming_matches' | |
def run(self, dispatcher, tracker, domain): | |
res = requests.get(CRIC_API_URL + "matches" + "?apikey=" + CRIC_API_KEY + "&offset=0") | |
if res.status_code == 200: | |
matches_data = res.json()["data"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ActionGetRecentMatches(Action): | |
def name(self): | |
return 'action_get_recent_matches' | |
def run(self, dispatcher, tracker, domain): | |
res = requests.get(CRIC_API_URL + "currentMatches" + "?apikey=" + CRIC_API_KEY + "&offset=0") | |
if res.status_code == 200: | |
matches_data = res.json()["data"] | |
matches_data.sort(key=lambda x: x["date"], reverse=True) | |
matches_data = [x for x in matches_data if "matchType" in x] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docs_path = 'sample_files' | |
ignore_words = ['Fig','like','e.g.','i.e.','one'] | |
all_keywords = [] | |
for filename in os.listdir(docs_path): | |
filepath = os.path.join(docs_path, filename) | |
if os.path.isfile(filepath) and filename.endswith('.pdf'): | |
print(f'Parsing file: {filename}') | |
try: | |
file_text = read_file(filepath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filepath = 'sample_files/AI_Blockchain_Survey_2022.pdf' | |
file_text = read_file(filepath) | |
keywords = extract_keywords(file_text, min_word_length = 3) | |
create_word_cloud(keywords,maximum_words=200) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_word_cloud(keywords, maximum_words = 100, bg = 'white', cmap='Dark2', | |
maximum_font_size = 256, width = 3000, height = 2000, | |
random_state = 42, fig_w = 15, fig_h = 10, output_filepath = None): | |
# Convert keywords to dictionary with values and its occurences | |
word_could_dict=Counter(keywords) | |
wordcloud = WordCloud(background_color=bg, max_words=maximum_words, colormap=cmap, | |
stopwords=STOPWORDS, max_font_size=maximum_font_size, | |
random_state=random_state, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_keywords(text, ignore_words = [], | |
min_word_length = 0, | |
ignore_numbers = True, | |
ignore_case = True): | |
# Remove words with special characters | |
filtered_text = ''.join(filter(lambda x:x in string.printable, text)) | |
# Create word tokens from the text string | |
tokens = word_tokenize(filtered_text) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_file(filepath, use_method = 'textract'): | |
text = "" | |
if not os.path.isfile(filepath): | |
print(f'Invalid file:{filepath}') | |
else: | |
if use_method == 'textract': | |
return read_file_textract(filepath) | |
elif use_method == 'pypdf': | |
return read_file_pypdf(filepath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_file_pypdf(filepath): | |
pdfFileObj = open(filepath,'rb') | |
pdfReader = PyPDF2.PdfFileReader(pdfFileObj) | |
num_pages = pdfReader.numPages | |
text = "" | |
# Read all the pages | |
for pg in range(num_pages): | |
page = pdfReader.getPage(pg) | |
text += page.extractText() | |
return text |
NewerOlder