Skip to content

Instantly share code, notes, and snippets.

@webserveis
Last active November 3, 2022 11:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save webserveis/78ea9677c935ef2c90043100c69befdb to your computer and use it in GitHub Desktop.
Save webserveis/78ea9677c935ef2c90043100c69befdb to your computer and use it in GitHub Desktop.
Cercanias, procesado de datos abiertos para los horarios de trenes

Dependencias

  • Servidor Mysql
  • Instalar Python 3+
  • Instalar dependencias de python, pandaz, mysql, alive-progress $> pip install mysql-connector-python $> pip install pandas $> pip install alive-progress

Fuente de datos

La misma renfe ofrece los datos abiertos para cercanias Horario de los servicios de viajeros prestados por los trenes Cercanías y Rodalies. Se pueden consultar en http://renfe.com https://data.renfe.com/dataset/horarios-cercanias Descargar el zip y extraer los archivos .txt

Base de datos

En MYSQL Ejecutar los scripts cercanias_db.sql para crear las tablas

Procesado de datos

editar cercanias.py y configurar el servidor SQL Para procesar los datos abiertos de cercanias, ejecutar el script cercanias.py a la misma raiz a donde se han extraido los datos.

$> python cercanias.py

from alive_progress import alive_bar
import mysql.connector
import csv
import pandas as pd
import re
import time
#https://dev.mysql.com/doc/connector-python/en/connector-python-example-cursor-transaction.html
#============================ EDIT MYSQL CONFIGURATION ============================
DB_HOST = 'localhost'
DB_USER = 'root'
DB_PASSWORD = ''
DB_NAME = 'cercanias'
#================================== END OF EDIT ===================================
def cleanStr(s) :
return re.sub("\s\s+" , " ", s).replace("Ñ", "ñ")
def calendarParser(cnx):
cursor = cnx.cursor()
add_item = ("INSERT INTO calendar "
"(service_id, monday, tuesday, wednesday, thursday, friday, saturday, sunday, start_date, end_date) "
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
cursor.execute("TRUNCATE TABLE calendar")
print("Deleted calendar table data (✓)")
with open('calendar.txt', newline='', encoding="utf-8") as csvfile:
df = pd.read_csv(csvfile)
df.columns = df.columns.str.strip()
total = len(df.index)
with alive_bar(total, dual_line=True, title='Calendar ') as bar:
for index, row in df.iterrows():
#print(index+1, total, row)
data_item = (
row['service_id'].strip(),
row['monday'],
row['tuesday'],
row['wednesday'],
row['thursday'],
row['friday'],
row['saturday'],
row['sunday'],
row['start_date'],
row['end_date'])
cursor.execute(add_item, data_item)
cnx.commit()
bar()
cursor.close()
def routesParser(cnx):
cursor = cnx.cursor()
add_item = ("INSERT INTO routes "
"(route_id, route_short_name, route_long_name, route_type, route_color, route_text_color) "
"VALUES (%s, %s, %s, %s, %s, %s)")
cursor.execute("TRUNCATE TABLE routes")
print("Deleted routes table data (✓)")
with open('routes.txt', newline='', encoding="utf-8") as csvfile:
df = pd.read_csv(csvfile)
df.columns = df.columns.str.strip()
total = len(df.index)
with alive_bar(total, dual_line=True, title='Routes ') as bar:
for index, row in df.iterrows():
data_item = (
row['route_id'].strip(),
row['route_short_name'],
cleanStr(row['route_long_name']),
row['route_type'],
row['route_color'],
row['route_text_color']
)
cursor.execute(add_item, data_item)
cnx.commit()
bar()
cursor.close()
def stopsParser(cnx):
cursor = cnx.cursor()
add_item = ("INSERT INTO stops "
"(stop_id, stop_name, stop_lat, stop_lon) "
"VALUES (%s, %s, %s, %s)")
cursor.execute("TRUNCATE TABLE stops")
print("Deleted stops table data (✓)")
with open('stops.txt', newline='', encoding="utf-8") as csvfile:
df = pd.read_csv(csvfile, dtype=str)
df.columns = df.columns.str.strip()
total = len(df.index)
with alive_bar(total, dual_line=True, title='Stops ') as bar:
for index, row in df.iterrows():
data_item = (
row['stop_id'].strip(),
cleanStr(row['stop_name']),
float(row['stop_lat']),
float(row['stop_lon'])
)
cursor.execute(add_item, data_item)
cnx.commit()
bar()
cursor.close()
def tripsParser(cnx):
cursor = cnx.cursor()
add_item = ("INSERT INTO trips "
"(route_id, service_id, trip_id, trip_headsign) "
"VALUES (%s, %s, %s, %s)")
cursor.execute("TRUNCATE TABLE trips")
print("Deleted trips table data (✓)")
with open('trips.txt', newline='', encoding="utf-8") as csvfile:
df = pd.read_csv(csvfile, dtype=str)
df.columns = df.columns.str.strip()
total = len(df.index)
with alive_bar(total, dual_line=True, title='Trips ') as bar:
for index, row in df.iterrows():
data_item = (
row['route_id'].strip(),
row['service_id'],
row['trip_id'],
None)
cursor.execute(add_item, data_item)
cnx.commit()
bar()
cursor.close()
def stopTimesParser(cnx):
cursor = cnx.cursor()
add_item = ("INSERT INTO stop_times "
"(trip_id, arrival_time, departure_time, stop_id, stop_sequence) "
"VALUES (%s, %s, %s, %s, %s)")
cursor.execute("TRUNCATE TABLE stop_times")
print("Deleted stop_times table data (✓)")
with open('stop_times.txt', newline='', encoding="utf-8") as csvfile:
df = pd.read_csv(csvfile, dtype=str)
df.columns = df.columns.str.strip()
total = len(df.index)
with alive_bar(total, dual_line=True, title='Stop Times') as bar:
for index, row in df.iterrows():
data_item = (
row['trip_id'].strip(),
row['arrival_time'],
row['departure_time'],
row['stop_id'],
cleanStr(row['stop_sequence'])
)
cursor.execute(add_item, data_item)
cnx.commit()
bar()
cursor.close()
def fetchCompleteData(cnx):
cursor = cnx.cursor()
with alive_bar(2, dual_line=True, title='Fetching..') as bar:
cursor.execute("UPDATE routes SET routes.region_id = SUBSTRING(routes.route_id, 1, 2);")
cnx.commit()
bar()
time.sleep(2)
cursor.execute("UPDATE routes SET routes.active = 1 WHERE routes.route_id IN (SELECT DISTINCT routes.route_id FROM routes INNER JOIN trips ON trips.route_id = routes.route_id WHERE EXISTS (SELECT routes.route_id FROM routes));")
cnx.commit()
bar()
cursor.close()
#================================== MAIN SCRIPT ===================================
cnx = mysql.connector.connect(user=DB_USER, password=DB_PASSWORD,
host=DB_HOST,
database=DB_NAME)
print("Cercanias File Parser v1.0.1")
calendarParser(cnx)
routesParser(cnx)
stopsParser(cnx)
tripsParser(cnx)
stopTimesParser(cnx)
fetchCompleteData(cnx)
cnx.close()
print("🏁🏁🏁 Finished All 🏁🏁🏁")
-- --------------------------------------------------------
-- Host: 127.0.0.1
-- Versión del servidor: 10.4.24-MariaDB - mariadb.org binary distribution
-- SO del servidor: Win64
-- HeidiSQL Versión: 11.3.0.6295
-- --------------------------------------------------------
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET NAMES utf8 */;
/*!50503 SET NAMES utf8mb4 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
-- Volcando estructura de base de datos para cercanias
CREATE DATABASE IF NOT EXISTS `cercanias` /*!40100 DEFAULT CHARACTER SET utf8mb4 */;
USE `cercanias`;
-- Volcando estructura para tabla cercanias.api_status
CREATE TABLE IF NOT EXISTS `api_status` (
`api_status_id` tinyint(4) NOT NULL DEFAULT 0,
`status_code` tinyint(4) DEFAULT NULL,
`ctime` timestamp NULL DEFAULT current_timestamp(),
`utime` timestamp NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
PRIMARY KEY (`api_status_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='0 NO-DATA\r\n1 AVAILABLE\r\n2 UPDATING';
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.calendar
CREATE TABLE IF NOT EXISTS `calendar` (
`service_id` varchar(10) NOT NULL,
`monday` bit(1) NOT NULL,
`tuesday` bit(1) NOT NULL,
`wednesday` bit(1) NOT NULL,
`thursday` bit(1) NOT NULL,
`friday` bit(1) NOT NULL,
`saturday` bit(1) NOT NULL,
`sunday` bit(1) NOT NULL,
`start_date` date NOT NULL,
`end_date` date NOT NULL,
PRIMARY KEY (`service_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.regions
CREATE TABLE IF NOT EXISTS `regions` (
`region_id` varchar(10) NOT NULL,
`shortName` varchar(20) DEFAULT NULL,
`longName` varchar(40) DEFAULT NULL,
PRIMARY KEY (`region_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.routes
CREATE TABLE IF NOT EXISTS `routes` (
`route_id` varchar(10) NOT NULL,
`route_short_name` varchar(8) NOT NULL,
`route_long_name` varchar(80) NOT NULL,
`route_type` tinyint(4) NOT NULL DEFAULT 0,
`route_color` varchar(8) NOT NULL DEFAULT '0',
`route_text_color` varchar(8) NOT NULL DEFAULT '0',
`region_id` varchar(10) NOT NULL DEFAULT '0',
`active` bit(1) NOT NULL DEFAULT b'0',
PRIMARY KEY (`route_id`),
KEY `region_id` (`region_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='route_id\r\nroute_short_name\r\nroute_long_name\r\nroute_type\r\nroute_color\r\nroute_text_color \n';
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.stops
CREATE TABLE IF NOT EXISTS `stops` (
`stop_id` varchar(10) NOT NULL DEFAULT '',
`stop_name` varchar(80) NOT NULL,
`stop_lat` decimal(11,7) NOT NULL DEFAULT 0.0000000,
`stop_lon` decimal(11,7) NOT NULL DEFAULT 0.0000000,
PRIMARY KEY (`stop_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.stop_times
CREATE TABLE IF NOT EXISTS `stop_times` (
`trip_id` varchar(20) NOT NULL,
`arrival_time` time NOT NULL,
`departure_time` time NOT NULL,
`stop_id` varchar(10) NOT NULL,
`stop_sequence` smallint(6) NOT NULL DEFAULT 0,
KEY `trip_id` (`trip_id`),
KEY `stop_id` (`stop_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.trips
CREATE TABLE IF NOT EXISTS `trips` (
`route_id` varchar(10) NOT NULL,
`service_id` varchar(10) NOT NULL,
`trip_id` varchar(20) NOT NULL,
`trip_headsign` varchar(40) DEFAULT NULL,
PRIMARY KEY (`trip_id`),
KEY `route_id` (`route_id`),
KEY `service_id` (`service_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- La exportación de datos fue deseleccionada.
-- Volcando estructura para tabla cercanias.xrequests
CREATE TABLE IF NOT EXISTS `xrequests` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`originip` varchar(45) NOT NULL DEFAULT '',
`ts` timestamp NOT NULL DEFAULT current_timestamp(),
PRIMARY KEY (`id`),
KEY `ts` (`ts`),
KEY `originip` (`originip`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8 COMMENT='Requests from remote IPs';
-- La exportación de datos fue deseleccionada.
/*!40101 SET SQL_MODE=IFNULL(@OLD_SQL_MODE, '') */;
/*!40014 SET FOREIGN_KEY_CHECKS=IFNULL(@OLD_FOREIGN_KEY_CHECKS, 1) */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40111 SET SQL_NOTES=IFNULL(@OLD_SQL_NOTES, 1) */;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment