Skip to content

Instantly share code, notes, and snippets.

View JLFDataScience's full-sized avatar

Jose Luis Fernández Nuevo JLFDataScience

  • FGCSIC
View GitHub Profile
@JLFDataScience
JLFDataScience / Loop_new_Teams_notification.py
Created November 25, 2022 09:52
Send notification if there are new records regarding the last csv saved
#Send notification if there are new records regarding the last csv saved
print("Scouting: BDNS | Spanish Grants Database")
#Decision loop send notification
if len(df_diff) != 0:
print('Notified to Teams')
mymessageTeams (df_diff, title_sec, url_sec)
driver.quit()
else:
print('No new notifications')
driver.quit()
@JLFDataScience
JLFDataScience / Teams_notification_function.py
Created November 25, 2022 09:51
Notification function with the pymsteams library
#Notification function with the pymsteams library
##generate a template of the message that will be sent to Teams
def mymessageTeams (df_diff, title_sec, url_sec):
import pymsteams
myTeamsMessage = pymsteams.connectorcard("url_connection_in_your_Teams_channel")
#Texto inicial
myTeamsMessage.text("@alerts | Scouting public Calls in spain App by JLFDS")
# create the section
myMessageSection = pymsteams.cardsection()
# Section Title
@JLFDataScience
JLFDataScience / Compare_dfs_select_notduplicates.py
Created November 25, 2022 09:49
Generate a df only with the last and not repeated records to send the alarm
#Generate a df only with the last and not repeated records to send the alarm
#Recover the old df of the previous revision
df_old = pd.read_csv(r'C:\Users\username\yourath\Review_calls_innovation.csv')
df_old['BD_Code'] = df_old['BD_Code'].astype('string')
#Compare two dfs and delete duplicate records
df_diff = pd.concat([df,df_old]).reset_index(drop=True).drop_duplicates(subset=['BD_Code'], keep=False)
#Select the fields that will appear in the Teams notification
df_diff = df_diff[['BD_Code','Authority','Organization', 'Title','link']]
@JLFDataScience
JLFDataScience / pyshorteners_library.py
Created November 24, 2022 17:23
Shortened the urls of the 'link' field so that they are read better in Teams with pyshorteners library
#Shortened the urls of the 'link' field so that they are read better in Teams with pyshorteners library
s = ps.Shortener()
df['link'] = df['link'].apply(lambda x: s.chilpit.short(x))
@JLFDataScience
JLFDataScience / Create_df_dictionary.py
Created November 24, 2022 17:21
Create df by dictionary
#create df from dictionary
columns = ['Number','BD_Code','MRR','Authority','Organization','Depart','Date','Title', 'Title2', 'link', 'cod_desc']
df = pd.DataFrame(data)
df.columns = columns
#Keep only the columns with information necessary to generate the alarm
df = df[['BD_Code','MRR','Authority','Organization','Date','Title', 'link']]
#we bind to that the Column Codigo_BD is string type
df['BD_Code'] = df['BD_Code'].astype('string')
#We put the title in minuscule so that it occupies less in the notification of the Teams
df['Title'] = df['Title'].str.lower()
@JLFDataScience
JLFDataScience / Selenium_select_innerHTMLTable.py
Last active November 25, 2022 10:24
Extract html data of table in selenium
#Select the html code from the lookup table
tabla = driver.find_element_by_xpath('//*[@id="grid"]')
#Treat table html with BeautifulSoup
soup = BS(tabla.get_attribute('innerHTML'),"html.parser")
#We extract the data from the table in a dictionary
data = []
rows = soup.find_all('tr')[1:]
for row in rows:
@JLFDataScience
JLFDataScience / Selenium_conect_BDNS.py
Created November 24, 2022 17:16
Model Selenium connect with BDNS
#Libraries
import requests
import numpy as np
import pandas as pd
import json
import time
import pymsteams
import pyshorteners as ps
from bs4 import BeautifulSoup as BS
from selenium import webdriver
@JLFDataScience
JLFDataScience / scraping_missing_values.py
Created March 10, 2022 17:06
The difficulty of scraping with missing values
data = {
'name': [],
'urls': [],
'position': [],
'area': [],
'year': [],
'city': []
}
for tag in soup.find_all('article', 'box becari buscador'):
@JLFDataScience
JLFDataScience / extract_json_to_pandas.py
Created February 3, 2022 18:21
Upload a .json file from a url and convert the data to pandas
#Final Download and Transformation Script in Dataframe
open_status = 575 #open calls
close_status = 0 #close calls
url = 'https://api-manager.universia.net/santanderx-core/api/calls/find?offset='+str(close_status)+'&limit='+ str(open_status)
r = requests.get(url)
data = json.loads(r.text)
df_calls = pd.DataFrame(columns=['name', 'entityName', 'entityCountry', 'shortDescription', 'edition', 'status',
@JLFDataScience
JLFDataScience / app.py
Created June 11, 2021 16:27
Python script for webapp deploy in Heroku cloud
#import urllib.request
import numpy as np
import requests
import pandas as pd
import json
#import io
#import warnings
#import time
import streamlit as st
#import plotly.express as px