Last active
May 4, 2023 00:12
-
-
Save doriantaylor/b3de78c2d4edb05ef5655b998880d139 to your computer and use it in GitHub Desktop.
ruby script to turn a zoom chat into sioc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rdf' | |
require 'rdf/vocab' | |
require 'rdf/turtle' | |
require 'pathname' | |
require 'uuidtools' | |
require 'date' | |
class ZoomChatParser | |
RAW = /\G(\d{1,2}:\d\d:\d\d)\s+From\s+(.*(?= to )) to ((?:\S| )+):\r\n\t([^\r]*?)\n(?=^\d|$)/ | |
REP = /\A(?:Reacted to "(.*)" with (.)\s*|Replying to "(.*)"\s+(.*)|Removed a (.) reaction from "(.*)"\s*)/mu | |
DCT = RDF::Vocab::DC | |
FOAF = RDF::Vocab::FOAF | |
SIOC = RDF::Vocab::SIOC | |
SIOCT = RDF::Vocab::SiocTypes | |
private | |
def uuid_urn | |
RDF::URI(UUIDTools::UUID.random_create.to_uri) | |
end | |
def parse string | |
out = [] | |
string.scan RAW do |m| | |
ts, sender, recipient, message = m | |
unless person = @people[sender] | |
nick = RDF::Literal(sender) | |
person = @people[sender] = uuid_urn | |
repo << [person, RDF.type, SIOC.UserAccount] | |
repo << [person, FOAF.nick, nick] | |
end | |
out << msg = { | |
time: DateTime.parse(ts), | |
sender: person, | |
recipient: recipient, | |
id: uuid_urn | |
} | |
if rm = REP.match(message) | |
a, b, c = [0, 2, 4].map { |x| rm.captures[x, 2] } | |
if a.first | |
msg[:reply] = a.first | |
msg[:react] = a.last | |
elsif b.first | |
# reply | |
msg[:reply] = b.first | |
msg[:message] = b.last | |
else | |
# remove | |
msg[:reply] = c.last | |
end | |
else | |
msg[:message] = message | |
end | |
end | |
out.each_index do |i| | |
m = out[i] | |
if text = m[:message] | |
id = m[:id] | |
repo << [@channel, SIOC.container_of, id] | |
repo << [id, RDF.type, SIOCT.InstantMessage] | |
repo << [id, RDF::RDFV.value, RDF::Literal(text, language: :en)] | |
repo << [id, DCT.creator, m[:sender]] | |
repo << [id, DCT.created, m[:time]] | |
urls = text.scan /(https?:\/\/[^[:space:]<>()]+)/i | |
urls.flatten.each { |u| repo << [id, DCT.references, RDF::URI(u)] } | |
# add message | |
if i > 0 | |
hm = out.take(i).reverse.detect { |x| x.key? :message } | |
repo << [id, SIOC.previous_by_date, hm[:id]] if hm | |
end | |
end | |
end | |
out | |
end | |
public | |
PREFIXES = { | |
rdf: RDF::RDFV, | |
dct: DCT, | |
foaf: FOAF, | |
sioc: SIOC, | |
sioct: SIOCT, | |
xsd: RDF::XSD, | |
} | |
attr_reader :repo | |
def initialize channel: nil, name: nil, default: nil | |
@repo = RDF::Repository.new | |
@people = {} | |
@default = (default || 'Everyone').to_s.freeze | |
@channel = channel || uuid_urn | |
@repo << [@channel, RDF.type, SIOCT.ChatChannel] | |
@repo << [@channel, RDF::Vocab::DCT.title, RDF::Literal(name)] if name | |
end | |
def run *files | |
files = files.flatten.uniq.map { |x| Pathname(x).expand_path } | |
files.each do |f| | |
parse f.read | |
end | |
self | |
end | |
end | |
if __FILE__ == $0 | |
require 'commander' | |
Commander.configure do | |
program :name, 'zoom-chat-parser' | |
program :version, '0.0.2' | |
program :description, 'parse zoom chats lol' | |
command :main do |c| | |
c.action do |args, opts| | |
z = ZoomChatParser.new.run args | |
puts z.repo.dump :turtle, prefixes: ZoomChatParser::PREFIXES | |
end | |
end | |
default_command :main | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment