Last active
May 25, 2024 10:20
-
-
Save dacr/0b8e064aacef1bd69f41380afe9f0592 to your computer and use it in GitHub Desktop.
Photos model experiments - When you have ~89000 photos/videos tooling is mandatory / published by https://github.com/dacr/code-examples-manager #6ec702df-15ff-4cd8-83a4-626d8f017649/cdb0e37c503f8fd429dd796a4bec573a0151a473
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : Photos model experiments - When you have ~89000 photos/videos tooling is mandatory | |
// keywords : scala, photos, memories, poc | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 6ec702df-15ff-4cd8-83a4-626d8f017649 | |
// created-on : 2023-07-16T20:15:00+01:00 | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala "3.4.2" | |
//> using objectWrapper | |
//> using dep "com.drewnoakes:metadata-extractor:2.18.0" | |
//> using dep "com.fasterxml.uuid:java-uuid-generator:4.2.0" | |
//---------------------- | |
import java.io.{File, IOException} | |
import java.nio.charset.Charset | |
import java.nio.file.attribute.BasicFileAttributes | |
import java.nio.file.{Files, Path, Paths} | |
import java.time.{Instant, OffsetDateTime, ZoneId, ZoneOffset, ZonedDateTime} | |
import java.util.UUID | |
import scala.util.matching.Regex | |
import scala.util.{Either, Failure, Left, Properties, Right, Success, Try} | |
import java.time.format.DateTimeFormatter.ISO_DATE_TIME | |
import java.time.temporal.ChronoField | |
import scala.jdk.CollectionConverters.* | |
import scala.Console.{BLUE, GREEN, RED, RESET, YELLOW} | |
import com.fasterxml.uuid.Generators | |
import scala.annotation.targetName | |
// ===================================================================================================================== | |
type AltitudeMeanSeaLevel = Double // https://en.wikipedia.org/wiki/Sea_level | |
import DecimalDegrees.* | |
import DegreeMinuteSeconds.* | |
object DecimalDegrees { | |
opaque type LatitudeDecimalDegrees = Double // https://en.wikipedia.org/wiki/Decimal_degrees | |
opaque type LongitudeDecimalDegrees = Double // https://en.wikipedia.org/wiki/Decimal_degrees | |
object LatitudeDecimalDegrees { | |
def apply(value:Double):LatitudeDecimalDegrees = value | |
} | |
object LongitudeDecimalDegrees { | |
def apply(value:Double):LongitudeDecimalDegrees = value | |
} | |
extension (dd: LatitudeDecimalDegrees) { | |
@targetName("toDegreeMinuteSeconds_latitude") | |
def toDegreeMinuteSeconds:LatitudeDegreeMinuteSeconds = ??? // https://en.wikipedia.org/wiki/Decimal_degrees | |
} | |
extension (dd: LongitudeDecimalDegrees) { | |
@targetName("toDegreeMinuteSeconds_longitude") | |
def toDegreeMinuteSeconds:LongitudeDegreeMinuteSeconds = ??? // https://en.wikipedia.org/wiki/Decimal_degrees | |
} | |
} | |
object DegreeMinuteSeconds { | |
val latitudeDmsRE = """[-+]?(\d+)°\s*(\d+)['′]\s*(\d+(?:[.,]\d+)?)(?:(?:")|(?:'')|(?:′′)|(?:″))\s+([NS])""".r | |
val longitudeDmsRE = """[-+]?(\d+)°\s*(\d+)['′]\s*(\d+(?:[.,]\d+)?)(?:(?:")|(?:'')|(?:′′)|(?:″))\s+([EW])""".r | |
private def convert(d: String, m: String, s: String, ref: String) = { | |
(if ("NE".contains(ref.toUpperCase)) 1 else -1) * | |
d.toDouble + | |
m.toDouble / 60d + | |
s.replaceAll("[,]", ".").toDouble / 3600d | |
} | |
opaque type LatitudeDegreeMinuteSeconds = String // https://en.wikipedia.org/wiki/Decimal_degrees | |
opaque type LongitudeDegreeMinuteSeconds = String // https://en.wikipedia.org/wiki/Decimal_degrees | |
object LatitudeDegreeMinuteSeconds { | |
def apply(dmsSpec: String): Try[LatitudeDegreeMinuteSeconds] = { | |
if (!DegreeMinuteSeconds.latitudeDmsRE.matches(dmsSpec)) | |
Failure(IllegalArgumentException(s"given DegreeMinuteSeconds latitude string is invalid")) | |
else Success(dmsSpec) | |
} | |
} | |
object LongitudeDegreeMinuteSeconds { | |
def apply(dmsSpec: String): Try[LongitudeDegreeMinuteSeconds] = { | |
if (!DegreeMinuteSeconds.longitudeDmsRE.matches(dmsSpec)) | |
Failure(IllegalArgumentException(s"given DegreeMinuteSeconds longitude string is invalid")) | |
else Success(dmsSpec) | |
} | |
} | |
extension (dms: LatitudeDegreeMinuteSeconds) { | |
//def toLatitudeDecimalDegrees: LatitudeDecimalDegrees = dms match { | |
@targetName("toDecimalDegrees_latitude") | |
def toDecimalDegrees: LatitudeDecimalDegrees = dms match { | |
case DegreeMinuteSeconds.latitudeDmsRE(d, m, s, ref) => | |
LatitudeDecimalDegrees(convert(d, m, s, ref)) | |
} | |
} | |
extension (dms: LongitudeDegreeMinuteSeconds) { | |
//def toLongitudeDecimalDegrees: LongitudeDecimalDegrees = dms match { | |
@targetName("toDecimalDegrees_longitude") | |
def toDecimalDegrees: LongitudeDecimalDegrees = dms match { | |
case DegreeMinuteSeconds.longitudeDmsRE(d, m, s, ref) => | |
LongitudeDecimalDegrees(convert(d, m, s, ref)) | |
} | |
} | |
} | |
case class GeoPoint( | |
latitude: LatitudeDecimalDegrees, | |
longitude: LongitudeDecimalDegrees, | |
altitude: AltitudeMeanSeaLevel | |
) | |
object GeoPoint { | |
def apply( | |
latitudeDMS: LatitudeDegreeMinuteSeconds, | |
longitudeDMS: LongitudeDegreeMinuteSeconds, | |
altitudeMeanSeaLevel: AltitudeMeanSeaLevel | |
): GeoPoint = { | |
GeoPoint( | |
latitudeDMS.toDecimalDegrees, | |
longitudeDMS.toDecimalDegrees, | |
altitudeMeanSeaLevel | |
) | |
} | |
} | |
case class Dimension2D( | |
width: Int, | |
height: Int | |
) | |
case class BoundingBox( | |
x: Int, | |
y: Int, | |
dimension: Dimension2D | |
) | |
case class SomeoneId( | |
uuid: UUID | |
) extends AnyVal | |
case class PhotoOwnerId( | |
uuid: UUID | |
) extends AnyVal | |
case class PhotoId( | |
uuid: UUID | |
) extends AnyVal | |
case class PhotoHash( | |
code: String | |
) extends AnyVal | |
case class PhotoKeyword( | |
text: String | |
) extends AnyVal | |
case class PhotoCategory( | |
text: String | |
) extends AnyVal | |
case class Someone( | |
id: SomeoneId, | |
firstName: String, | |
lastName: String, | |
birthDate: Option[OffsetDateTime] | |
) | |
case class DetectedClassification( | |
name: String | |
) | |
case class DetectedObject( | |
name: String, | |
box: BoundingBox | |
) | |
case class DetectedFace( | |
someoneId: Option[SomeoneId], | |
box: BoundingBox | |
) | |
enum PhotoSource { | |
case PhotoFile( | |
path: String, | |
size: Long, | |
hash: PhotoHash, | |
lastModified: OffsetDateTime | |
) | |
} | |
case class PhotoOrientation( | |
code: Int | |
) | |
case class PhotoMetaData( | |
dimension: Dimension2D, | |
shootDateTime: Option[OffsetDateTime], | |
orientation: Option[PhotoOrientation], | |
cameraName: Option[String], | |
tags: Map[String, String], | |
lastUpdated: OffsetDateTime | |
) | |
case class PhotoKeywords( | |
keywords: List[PhotoKeyword], | |
lastUpdated: OffsetDateTime | |
) | |
case class PhotoClassifications( | |
classifications: List[DetectedClassification], | |
lastUpdated: OffsetDateTime | |
) | |
case class PhotoObjects( | |
objects: List[DetectedObject], | |
lastUpdated: OffsetDateTime | |
) | |
case class PhotoFaces( | |
faces: List[DetectedFace], | |
lastUpdated: OffsetDateTime | |
) | |
enum MiniatureSource { | |
case MiniatureFile( | |
path: String, | |
dimension: Dimension2D | |
) | |
} | |
case class Miniatures( | |
sources: List[MiniatureSource], | |
lastUpdated: OffsetDateTime | |
) | |
case class Photo( | |
id: PhotoId, | |
ownerId: PhotoOwnerId, | |
timestamp: OffsetDateTime, | |
source: PhotoSource, | |
miniatures: Option[Miniatures], | |
metaData: Option[PhotoMetaData], | |
foundPlace: Option[GeoPoint], | |
foundCategory: Option[PhotoCategory], | |
foundKeywords: Option[PhotoKeywords], | |
foundClassifications: Option[PhotoClassifications], | |
foundObjects: Option[PhotoObjects], | |
foundFaces: Option[PhotoFaces] | |
) | |
// ===================================================================================================================== | |
val generatorPUUID = Generators.nameBasedGenerator() | |
/* Attempt to generate a unique photo identifier */ | |
def makePUUID(camera: Option[String], shootDateTime: Option[Instant], filePath: Path, fileHash: String): UUID = { | |
// generatorPUUID.generate(filePath.getFileName().toString + shootDateTime.map(_.toString).getOrElse("")) | |
generatorPUUID.generate(filePath.toString) | |
} | |
/* | |
object Photo { | |
implicit val pathEncoder: JsonEncoder[Path] = JsonEncoder[String].contramap(p => p.toString) | |
implicit val pathDecoder: JsonDecoder[Path] = JsonDecoder[String].map(p => Path.of(p)) | |
def makeTagKey(tag: com.drew.metadata.Tag): String = { | |
val prefix = tag.getDirectoryName().trim.replaceAll("""\s+""", "") | |
val name = tag.getTagName().trim.replaceAll("""\s+""", "") | |
val key = s"$prefix$name" | |
key.head.toLower + key.tail | |
} | |
def tagsToMap(tags: List[com.drew.metadata.Tag]): Map[String, String] = { | |
tags | |
.filterNot(_.getDescription == null) | |
.map(tag => makeTagKey(tag) -> tag.getDescription) | |
.toMap | |
} | |
def now = OffsetDateTime.now() // TODO : migrate to ZIO Clock.now | |
def checkTimestampValid(ts: OffsetDateTime) = ts.get(ChronoField.YEAR) >= 2000 & ts.isBefore(now) | |
def computeTimestamp(mayBeShootDateTime: Option[OffsetDateTime], fileLastUpdated: OffsetDateTime): OffsetDateTime = | |
mayBeShootDateTime match | |
case Some(shootDateTime) if checkTimestampValid(shootDateTime) => shootDateTime | |
case _ => fileLastUpdated | |
def makePhoto( | |
uuid: UUID, | |
filePath: Path, | |
fileSize: Long, | |
fileHash: String, | |
fileLastUpdated: Instant, | |
category: Option[String], | |
shootDateTime: Option[Instant], | |
camera: Option[String], | |
metaDataTags: List[com.drew.metadata.Tag], | |
keywords: List[String], // Extracted from category | |
classifications: List[String], // Extracted from AI DJL | |
detectedObjects: List[String] // Extracted from AI DJL | |
): Photo = { | |
val shootOffsetDateTime = shootDateTime.map(_.atOffset(ZoneOffset.UTC)) | |
val fileLastUpdatedOffsetDateTime = fileLastUpdated.atOffset(ZoneOffset.UTC) | |
val tags = tagsToMap(metaDataTags) | |
Photo( | |
uuid = uuid, | |
timestamp = computeTimestamp(shootOffsetDateTime, fileLastUpdatedOffsetDateTime), | |
filePath = filePath, | |
fileSize = fileSize, | |
fileHash = fileHash, | |
fileLastUpdated = fileLastUpdatedOffsetDateTime, | |
category = category, | |
shootDateTime = shootOffsetDateTime, | |
camera = camera, | |
tags = tags, | |
keywords = keywords, | |
classifications = classifications, | |
detectedObjects = detectedObjects, | |
place = computeGeoPoint(tags) | |
) | |
} | |
} | |
*/ | |
for { | |
lat <- LatitudeDegreeMinuteSeconds("38° 53′ 23″ N") | |
lon <- LongitudeDegreeMinuteSeconds("77° 00′ 32″ W") | |
} { | |
println(lat.toDecimalDegrees) | |
println(lon.toDecimalDegrees) | |
} | |
// END OF THIS EXPERIMENT TIME TO SWITCH TO A REAL WORLD PROJECT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment