Skip to content

Instantly share code, notes, and snippets.

@a1678991
Last active January 21, 2024 11:00
Show Gist options
  • Save a1678991/d4313c11fa9204f56474f5613a2fa461 to your computer and use it in GitHub Desktop.
Save a1678991/d4313c11fa9204f56474f5613a2fa461 to your computer and use it in GitHub Desktop.
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import argparse
from typing import List, Dict, Optional
def parse_kml(file_path: str) -> ET.Element:
"""
Parses a KML file and returns the root of the XML tree.
:param file_path: Path to the KML file.
:return: Root element of the XML tree.
"""
tree = ET.parse(file_path)
return tree.getroot()
def extract_data_from_placemark(placemark: ET.Element) -> Dict[str, str]:
"""
Extracts data from a Placemark element into a dictionary.
:param placemark: XML Element representing a Placemark.
:return: Dictionary with extracted data.
"""
placemark_data = {}
for child in placemark:
if child.tag.endswith('name'):
placemark_data['name'] = child.text
elif child.tag.endswith('ExtendedData'):
for data in child:
if data.tag.endswith('Data'):
key = data.attrib.get('name')
value = data.find('{http://earth.google.com/kml/2.1}value')
if value is not None:
placemark_data[key] = value.text
elif child.tag.endswith('Point'):
coordinates = child.find('{http://earth.google.com/kml/2.1}coordinates')
if coordinates is not None:
placemark_data['coordinates'] = coordinates.text.strip()
return placemark_data
def process_data(data: List[Dict[str, str]]) -> pd.DataFrame:
"""
Processes a list of dictionaries into a pandas DataFrame and calculates additional fields.
:param data: List of dictionaries containing Placemark data.
:return: DataFrame with processed data.
"""
df = pd.DataFrame(data)
df['speed_kmh'] = df['name'].str.extract(r'(\d+) km/h').astype(float)
df['distance_per_point'] = df['speed_kmh'] / 60 / 60 # Assuming 1 hour interval
df['total_distance_km'] = df['distance_per_point'].cumsum()
coordinates = df['coordinates'].str.split(',', expand=True)
df['latitude'] = coordinates[1].astype(float)
df['longitude'] = coordinates[0].astype(float)
df['timestamp'] = pd.to_datetime(df['TIME'], format='%Y.%m.%d_%H.%M.%S')
return df
def filter_data_by_time(df: pd.DataFrame, start_time: Optional[str] = None, end_time: Optional[str] = None) -> pd.DataFrame:
"""
Filters the DataFrame based on the provided start and end times.
:param df: DataFrame containing the data.
:param start_time: Start of the time range.
:param end_time: End of the time range.
:return: Filtered DataFrame.
"""
if start_time:
df = df[df['timestamp'] >= pd.Timestamp(start_time)]
if end_time:
df = df[df['timestamp'] <= pd.Timestamp(end_time)]
return df
def plot_data(df: pd.DataFrame, title: str, save_path: Optional[str]) -> None:
"""
Plots the speed (as moving average) and total distance over time.
:param df: DataFrame containing the data.
:param title: Title of the plot.
:param save_path: File path to save the plot as PNG.
"""
df['speed_moving_avg'] = df['speed_kmh'].rolling(window=3).mean()
fig, ax1 = plt.subplots(figsize=(12, 6))
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Speed (km/h) - Moving Average', color='tab:red')
ax1.plot(df['timestamp'], df['speed_moving_avg'], color='tab:red', label='Speed (Moving Avg)')
ax1.tick_params(axis='y', labelcolor='tab:red')
ax1.legend(loc='upper left')
ax2 = ax1.twinx()
ax2.set_ylabel('Total Distance (km)', color='tab:green')
ax2.plot(df['timestamp'], df['total_distance_km'], color='tab:green', linestyle=':', label='Total Distance')
ax2.tick_params(axis='y', labelcolor='tab:green')
ax2.legend(loc='upper right')
plt.title(title)
plt.tight_layout()
if save_path:
plt.savefig(save_path, format='png')
plt.show()
def main(file_path: str, start_time: Optional[str], end_time: Optional[str], title: str, save_path: Optional[str]) -> None:
"""
Main function to process and plot data from a KML file.
:param file_path: Path to the KML file.
:param start_time: Start of the time range for analysis.
:param end_time: End of the time range for analysis.
"""
root = parse_kml(file_path)
data = [extract_data_from_placemark(placemark) for placemark in root.iter('{http://earth.google.com/kml/2.1}Placemark')]
df = process_data(data)
df_filtered = filter_data_by_time(df, start_time, end_time)
plot_data(df_filtered, title, save_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process and plot data from a KML file.')
parser.add_argument('file_path', type=str, help='Path to the KML file.')
parser.add_argument('--start_time', type=str, help='Start of the time range for analysis (optional).', default=None)
parser.add_argument('--end_time', type=str, help='End of the time range for analysis (optional).', default=None)
parser.add_argument('--title', type=str, help='Title for the plot (optional).', default="")
parser.add_argument('--save', type=str, help='Path to save the plot as PNG (optional).', default=None)
args = parser.parse_args()
main(args.file_path, args.start_time, args.end_time, args.title, args.save)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment