Skip to content

Instantly share code, notes, and snippets.

@bobvanluijt
Created May 23, 2018 14:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bobvanluijt/5df06486a1d73938b91312305ef84764 to your computer and use it in GitHub Desktop.
Save bobvanluijt/5df06486a1d73938b91312305ef84764 to your computer and use it in GitHub Desktop.
Creates CBOR from GloVe vector file
package main
import (
"bufio"
"bytes"
"compress/gzip"
"fmt"
"io/ioutil"
"log"
"os"
"strconv"
"strings"
cbor "github.com/2tvenom/cbor"
)
type Vectors map[string][]float64
type MapOfSimilarity map[float64]string
func LoadVectors(inputFile string) Vectors {
vectors := Vectors{}
// load the Glove TXT file
file, err := os.Open(inputFile)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
for scanner.Scan() {
stringSlice := strings.Split(scanner.Text(), " ")
// length is complete slice minus 1
for i := 1; i < (len(stringSlice) - 1); i++ {
// parse the string to a float
float, _ := strconv.ParseFloat(stringSlice[i], 64)
// add the float to the vector
vectors[stringSlice[0]] = append(vectors[stringSlice[0]], float)
//vectors[stringSlice[0]][i] = float
}
}
return vectors
}
func main() {
// Load vectors
vectors := Vectors{}
vectors = LoadVectors("glove.6B.300d.txt")
// Create encoder and marshal
var buffTest bytes.Buffer
encoder := cbor.NewEncoder(&buffTest)
ok, error := encoder.Marshal(vectors)
//check binary string
if !ok {
fmt.Errorf("Error decoding %s", error)
}
fmt.Printf("Encoding to CBOR = done")
// Create GZIPed version
var b bytes.Buffer
w := gzip.NewWriter(&b)
w.Write(buffTest.Bytes())
w.Close()
// Create output file
fmt.Println("Creating output file", "vectors.cbor.gz")
err := ioutil.WriteFile("vectors.cbor.gz", b.Bytes(), 0644)
if err != nil {
panic(err)
}
fmt.Println("DONE")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment