Skip to content

Instantly share code, notes, and snippets.

@wingedpig
Created April 23, 2023 15:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save wingedpig/c1cf3de88cb1c251b92fff399b8d2a8c to your computer and use it in GitHub Desktop.
Save wingedpig/c1cf3de88cb1c251b92fff399b8d2a8c to your computer and use it in GitHub Desktop.
A Go program to parse ChatGPT export files
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"sort"
"strconv"
"strings"
"time"
)
// Time defines a timestamp encoded as epoch seconds in JSON
type Time time.Time
// MarshalJSON is used to convert the timestamp to JSON
func (t Time) MarshalJSON() ([]byte, error) {
return []byte(strconv.FormatInt(time.Time(t).Unix(), 10)), nil
}
// UnmarshalJSON is used to convert the timestamp from JSON
func (t *Time) UnmarshalJSON(s []byte) (err error) {
r := string(s)
q, err := strconv.ParseInt(strings.Split(r, ".")[0], 10, 64)
if err != nil {
return err
}
*(*time.Time)(t) = time.Unix(q, 0)
return nil
}
func (t Time) Unix() int64 {
return time.Time(t).Unix()
}
// Time returns the JSON time as a time.Time instance in UTC
func (t Time) Time() time.Time {
return time.Time(t).UTC()
}
func (t Time) String() string {
return t.Time().String()
}
type Chunk struct {
ID string `json:"id"`
Message struct {
ID string `json:"id"`
Author struct {
Role string `json:"role"`
} `json:"author"`
Created Time `json:"create_time"`
Content struct {
Type string `json:"content_type"`
Parts []string `json:"parts"`
} `json:"content"`
} `json:"message"`
}
type Convo struct {
Title string `json:"title"`
Created Time `json:"create_time"`
Updated Time `json:"update_time"`
Chunks map[string]Chunk `json:"mapping"`
}
func main() {
flag.Parse()
configPtr := flag.Args()
l := len(configPtr)
if l < 2 {
fmt.Printf("Usage: chatgptparseexport conversations.json outdir\n")
os.Exit(1)
}
in := configPtr[0]
outdir := strings.TrimSuffix(configPtr[1], "/")
content, err := ioutil.ReadFile(in)
if err != nil {
log.Fatal("Error when opening file: ", err)
}
var convos []Convo
err = json.Unmarshal(content, &convos)
if err != nil {
log.Fatal("Error during Unmarshal(): ", err)
}
fmt.Printf("Read %d conversations\n", len(convos))
for _, convo := range convos {
// sort the chunks by time
var chunks []Chunk
for _, chunk := range convo.Chunks {
chunks = append(chunks, chunk)
}
sort.Slice(chunks, func(i, j int) bool {
return chunks[i].Message.Created.Unix() < chunks[j].Message.Created.Unix()
})
// now write the chunks to a file
title := strings.TrimSuffix(convo.Title, ".")
filename := fmt.Sprintf("%s/%s.md", outdir, strings.ReplaceAll(title, "/", "-"))
fmt.Printf("Generating %s\n", filename)
f, err := os.Create(fmt.Sprintf("%s/%s.md", outdir, title))
if err != nil {
log.Fatal("Error when writing file: ", err)
}
f.WriteString(fmt.Sprintf("Created: %s\n", convo.Created.Time().Format("01-02-2006")))
f.WriteString(fmt.Sprintf("Updated: %s\n", convo.Updated.Time().Format("01-02-2006")))
f.WriteString("\n")
for _, chunk := range chunks {
for _, part := range chunk.Message.Content.Parts {
if part == "" {
continue
}
if chunk.Message.Author.Role == "user" {
f.WriteString("**User:** ")
} else {
f.WriteString("**ChatGPT:** ")
}
// ensure all code blocks are closed, because if you interrupt ChatGPT when it's writing out code,
// the code block will not be closed
num := strings.Count(part, "```")
if num%2 != 0 {
part = part + "```"
}
_, err := f.WriteString(part)
if err != nil {
log.Fatal("Error when writing file: ", err)
}
f.WriteString("\n\n")
}
}
f.Close()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment