TimKraemer/trip2excel.py

## trip2excel.py
import json
import sys
from datetime import datetime
import csv
import os
from geopy.geocoders import Nominatim  # pip install geopy


def convert_date(date_str):
    # Parse the ISO 8601 format date string
    date_obj = datetime.fromisoformat(date_str.rstrip("Z"))

    # Format the date into the desired format
    formatted_date = date_obj.strftime("%d.%m.%Y, %H:%M")

    return formatted_date


def generate_csv(directory):

    for filename in os.listdir(directory):
        # we're going to ignore all non-json files
        fna = filename.split(".")
        if len(fna) > 1 and fna[len(fna) - 1] == "json":
            # for every json file...
            with open(os.path.join(directory, filename), 'rb') as jsonFile:
                # parse json
                data = json.load(jsonFile)

                # we're only interested in the activities from our timeline
                activities = list(
                    filter(lambda tl_obj: "activitySegment" in tl_obj, data["timelineObjects"]))
                # specifically we want all "IN_PASSENGER_VEHICLE" activities
                trips = list(
                    filter(lambda activity: activity["activitySegment"]["activityType"] == "IN_PASSENGER_VEHICLE", activities))

                # we're using geopy's Nominatim reverse-location service (which uses openstreetmap's API)
                locator = Nominatim(user_agent="trip2excel")

                # csv first row as header (remove this, if you're only interested in the data)
                csv_data = [("start address", "destination address",
                             "distance", "start time", "destination time")]

                for trip in trips:
                    try:
                        # we're using the Nominatim's raw format, since we're going to format the address later
                        raw = locator.reverse("{}, {}".format(
                            (trip["activitySegment"]["startLocation"]
                             ["latitudeE7"] / 10000000),
                            (trip["activitySegment"]["startLocation"]
                             ["longitudeE7"] / 10000000)
                        )).raw

                        # format will be: building road house_number postcode town/suburb
                        start_address = "{} {} {} {} {}".format(
                            ("{}".format(raw.get("address").get("building")) if raw.get("address").get(
                                "building") else ""),
                            (raw.get("address").get("road")),
                            (raw.get("address").get("house_number")),
                            (raw.get("address").get("postcode")),
                            (raw.get("address").get("town")
                             or raw.get("address").get("suburb"))
                        )
                    except KeyError:
                        # if there is no address for the given gps coordinates
                        print("unknown start_address")
                        start_address = ""

                    try:
                        # same as start_address // FIXME: avoid code-duplication
                        raw = locator.reverse("{}, {}".format(
                            (trip["activitySegment"]["endLocation"]
                             ["latitudeE7"] / 10000000),
                            (trip["activitySegment"]["endLocation"]
                             ["longitudeE7"] / 10000000)
                        )).raw

                        destination_address = "{} {} {} {} {}".format(
                            ("{}".format(raw.get("address").get("building")) if raw.get("address").get(
                                "building") else ""),
                            (raw.get("address").get("road")),
                            (raw.get("address").get("house_number")),
                            (raw.get("address").get("postcode")),
                            (raw.get("address").get("town")
                             or raw.get("address").get("suburb"))
                        )
                    except KeyError:
                        print("unknown destination_address")
                        destination_address = ""

                    try:
                        # render the distance as kilometer
                        distance = "{}km".format(
                            trip["activitySegment"]["distance"] / 1000)
                    except KeyError:
                        print("unknown distance")
                        distance = ""

                    try:
                        # some date formatting - german style...
                        start_time = convert_date(
                            trip["activitySegment"]["duration"]["startTimestamp"])
                    except KeyError:
                        print("unknown start_time")
                        start_time = ""

                    try:
                        destination_time = convert_date(
                            trip["activitySegment"]["duration"]
                                ["endTimestamp"])

                    except KeyError:
                        print("unknown destination_time")
                        destination_time = ""

                    # creating a csv row
                    csv_data.append((
                        start_address,
                        destination_address,
                        distance,
                        start_time,
                        destination_time
                    ))

                    # some status reports for the terminal, since location retrieving takes some time...
                    print(
                        f'Processed trip {len(csv_data) - 1} of {len(trips)} in file {filename}')

                # create .csv file with utf-8 encoding and some M$ Excel defaults...
                with open(os.path.join(sys.argv[1], filename).split('.json')[0] + '.csv', 'w', newline='\n',
                          encoding='utf-8-sig') as csvFile:
                    writer = csv.writer(csvFile, dialect='excel', delimiter=';', quotechar='\'',
                                        quoting=csv.QUOTE_MINIMAL)
                    writer.writerows(csv_data)


if __name__ == '__main__':
    # as a first parameter we're expecting a directory filled with .json files from google's takeout location
    # history archive request it here: https://takeout.google.com/settings/takeout/custom/location_history

    if len(sys.argv) < 2:
        print("Error: No directory specified. Please provide the directory as a parameter.")
        sys.exit(1)

    directory = sys.argv[1]
    if not os.path.isdir(directory):
        print(f"Error: The specified directory '{directory}' does not exist.")
        sys.exit(1)

    generate_csv(directory)
	import json
	import sys
	from datetime import datetime
	import csv
	import os
	from geopy.geocoders import Nominatim # pip install geopy


	def convert_date(date_str):
	# Parse the ISO 8601 format date string
	date_obj = datetime.fromisoformat(date_str.rstrip("Z"))

	# Format the date into the desired format
	formatted_date = date_obj.strftime("%d.%m.%Y, %H:%M")

	return formatted_date


	def generate_csv(directory):

	for filename in os.listdir(directory):
	# we're going to ignore all non-json files
	fna = filename.split(".")
	if len(fna) > 1 and fna[len(fna) - 1] == "json":
	# for every json file...
	with open(os.path.join(directory, filename), 'rb') as jsonFile:
	# parse json
	data = json.load(jsonFile)

	# we're only interested in the activities from our timeline
	activities = list(
	filter(lambda tl_obj: "activitySegment" in tl_obj, data["timelineObjects"]))
	# specifically we want all "IN_PASSENGER_VEHICLE" activities
	trips = list(
	filter(lambda activity: activity["activitySegment"]["activityType"] == "IN_PASSENGER_VEHICLE", activities))

	# we're using geopy's Nominatim reverse-location service (which uses openstreetmap's API)
	locator = Nominatim(user_agent="trip2excel")

	# csv first row as header (remove this, if you're only interested in the data)
	csv_data = [("start address", "destination address",
	"distance", "start time", "destination time")]

	for trip in trips:
	try:
	# we're using the Nominatim's raw format, since we're going to format the address later
	raw = locator.reverse("{}, {}".format(
	(trip["activitySegment"]["startLocation"]
	["latitudeE7"] / 10000000),
	(trip["activitySegment"]["startLocation"]
	["longitudeE7"] / 10000000)
	)).raw

	# format will be: building road house_number postcode town/suburb
	start_address = "{} {} {} {} {}".format(
	("{}".format(raw.get("address").get("building")) if raw.get("address").get(
	"building") else ""),
	(raw.get("address").get("road")),
	(raw.get("address").get("house_number")),
	(raw.get("address").get("postcode")),
	(raw.get("address").get("town")
	or raw.get("address").get("suburb"))
	)
	except KeyError:
	# if there is no address for the given gps coordinates
	print("unknown start_address")
	start_address = ""

	try:
	# same as start_address // FIXME: avoid code-duplication
	raw = locator.reverse("{}, {}".format(
	(trip["activitySegment"]["endLocation"]
	["latitudeE7"] / 10000000),
	(trip["activitySegment"]["endLocation"]
	["longitudeE7"] / 10000000)
	)).raw

	destination_address = "{} {} {} {} {}".format(
	("{}".format(raw.get("address").get("building")) if raw.get("address").get(
	"building") else ""),
	(raw.get("address").get("road")),
	(raw.get("address").get("house_number")),
	(raw.get("address").get("postcode")),
	(raw.get("address").get("town")
	or raw.get("address").get("suburb"))
	)
	except KeyError:
	print("unknown destination_address")
	destination_address = ""

	try:
	# render the distance as kilometer
	distance = "{}km".format(
	trip["activitySegment"]["distance"] / 1000)
	except KeyError:
	print("unknown distance")
	distance = ""

	try:
	# some date formatting - german style...
	start_time = convert_date(
	trip["activitySegment"]["duration"]["startTimestamp"])
	except KeyError:
	print("unknown start_time")
	start_time = ""

	try:
	destination_time = convert_date(
	trip["activitySegment"]["duration"]
	["endTimestamp"])

	except KeyError:
	print("unknown destination_time")
	destination_time = ""

	# creating a csv row
	csv_data.append((
	start_address,
	destination_address,
	distance,
	start_time,
	destination_time
	))

	# some status reports for the terminal, since location retrieving takes some time...
	print(
	f'Processed trip {len(csv_data) - 1} of {len(trips)} in file {filename}')

	# create .csv file with utf-8 encoding and some M$ Excel defaults...
	with open(os.path.join(sys.argv[1], filename).split('.json')[0] + '.csv', 'w', newline='\n',
	encoding='utf-8-sig') as csvFile:
	writer = csv.writer(csvFile, dialect='excel', delimiter=';', quotechar='\'',
	quoting=csv.QUOTE_MINIMAL)
	writer.writerows(csv_data)


	if __name__ == '__main__':
	# as a first parameter we're expecting a directory filled with .json files from google's takeout location
	# history archive request it here: https://takeout.google.com/settings/takeout/custom/location_history

	if len(sys.argv) < 2:
	print("Error: No directory specified. Please provide the directory as a parameter.")
	sys.exit(1)

	directory = sys.argv[1]
	if not os.path.isdir(directory):
	print(f"Error: The specified directory '{directory}' does not exist.")
	sys.exit(1)

	generate_csv(directory)