Skip to content

Instantly share code, notes, and snippets.

@toVersus
Created April 20, 2018 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save toVersus/7babaed2cc643f1bf472986f35b5616c to your computer and use it in GitHub Desktop.
Save toVersus/7babaed2cc643f1bf472986f35b5616c to your computer and use it in GitHub Desktop.
[Language Processing 100 Essentials] #65: Retrieve artist information from MongoDB
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"time"
mgo "gopkg.in/mgo.v2"
"gopkg.in/mgo.v2/bson"
)
type Artist struct {
ID int `json:"id"`
GID string `json:"gid"`
Name string `json:"name"`
SortName string `json:"sort_name"`
Area string `json:"area"`
Aliases []*Aliase `json:"aliases"`
Begin *Begin `json:"begin"`
End *End `json:"end"`
Tags []*Tag `json:"tags"`
Rating *Rating `json:"rating"`
}
type Artists []*Artist
type Aliase struct {
Name string `json:"name"`
SortName string `json:"sort_name"`
}
type Begin struct {
Year int `json:"year"`
Month int `json:"month"`
Date int `json:"date"`
}
type End struct {
Year int `json:"year"`
Month int `json:"month"`
Date int `json:"date"`
}
type Tag struct {
Count int `json:"count"`
Value string `json:"value"`
}
type Rating struct {
Count int `json:"count"`
Value int `json:"value"`
}
func main() {
var filepath, artistName string
flag.StringVar(&filepath, "file", "", "specify a file path")
flag.StringVar(&filepath, "f", "", "specify a file path")
flag.StringVar(&artistName, "name", "", "specify the artist name")
flag.StringVar(&artistName, "n", "", "specify the artist name")
flag.Parse()
artists, err := readBSON(filepath)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
session, err := mgo.Dial("mongodb://localhost")
if err != nil {
fmt.Println(err)
os.Exit(1)
}
c := session.DB("MusicBrainz").C("artist")
size := len(artists)
for progress, artist := range artists {
err := c.Insert(artist)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if progress%10000 == 0 {
fmt.Printf("%d / %d...completed\n", progress, size)
}
}
query := c.Find(bson.M{"name": artistName})
artist, _, err := getQueryTime(query)
fmt.Println(artist)
}
func readBSON(path string) (Artists, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("could not open a file: %s\n %s", path, err)
}
defer f.Close()
var artists Artists
reader := bufio.NewReader(f)
for {
artist := Artist{}
buf, readErr := reader.ReadBytes('\n')
if (readErr != nil) && (readErr != io.EOF) {
panic(err)
}
if err = bson.UnmarshalJSON(buf, &artist); err != nil && readErr != io.EOF {
fmt.Print("could not parse json file.")
break
}
artists = append(artists, &artist)
if readErr == io.EOF {
break
}
}
return artists, nil
}
func getQueryTime(query *mgo.Query) (*Artist, time.Duration, error) {
artist := &Artist{}
start := time.Now()
if err := query.One(&artist); err != nil {
return nil, 0, err
}
return artist, time.Now().Sub(start), nil
}
package main
import (
"os"
"testing"
"github.com/go-test/deep"
mgo "gopkg.in/mgo.v2"
"gopkg.in/mgo.v2/bson"
)
var insertDocsTests = []struct {
name string
file string
text string
want []int
}{
{
name: "should get the tags",
file: "./fulltext-test.json",
text: `{"name": "Sam James", "area": "United States", "gender": "Male", "sort_name": "James, Sam", "ended": true, "gid": "183da4be-0cb0-4e6d-ba6d-91e57b7a6780", "type": "Person", "id": 729749, "aliases": [{"name": "Sam James Vende", "sort_name": "Sam James Vende"}]}
{"name": "Norman Kolodziej", "area": "Germany", "gender": "Male", "sort_name": "Kolodziej, Norman", "ended": true, "gid": "5ff386f1-2c4e-4c1c-b5fc-668ec25e1b3e", "type": "Person", "id": 811484}
{"name": "Bass Cube", "sort_name": "Bass Cube", "ended": true, "gid": "f1568f36-152b-40da-aef3-3582636f88be", "type": "Group", "id": 6153}
{"name": "Medras", "sort_name": "Medras", "ended": true, "gid": "a7d007ec-8026-4e84-982d-b6306baa14df", "type": "Person", "id": 723542}
{"name": "Kalev Lindal", "area": "Estonia", "gender": "Male", "sort_name": "Lindal, Kalev", "ended": true, "gid": "8864f9e3-6a03-40a7-9acb-ac8386d404e7", "type": "Person", "id": 892318}
{"name": "Nick Flower", "sort_name": "Flower, Nick", "ended": true, "gid": "537b606c-d7e4-4c8a-8e39-91e22c2bf720", "type": "Person", "id": 725047}
{"name": "Danièle Forget", "sort_name": "Forget, Danièle", "ended": true, "gid": "5d6d5857-3d03-4b1b-a3ee-d789e883c1b2", "type": "Person", "id": 726135}`,
want: []int{729749, 811484, 6153, 723542, 892318, 725047, 726135},
},
}
func TestGetTags(t *testing.T) {
for _, testcase := range insertDocsTests {
t.Log(testcase.name)
f, err := os.Create(testcase.file)
if err != nil {
t.Errorf("could not create a file: %s\n %s\n", testcase.file, err)
}
f.WriteString(testcase.text)
f.Close()
artists, err := readBSON(testcase.file)
if err != nil {
t.Errorf("could not parse a JSON file: %s\n %s\n", testcase.file, err)
}
session, err := mgo.Dial("mongodb://localhost")
if err != nil {
t.Error(err)
}
db := session.DB("Testing")
c := db.C("artist")
for _, artist := range artists {
err := c.Insert(artist)
if err != nil {
t.Error(err)
}
}
t.Log("")
t.Log("before indexing")
for _, artist := range artists {
query := c.Find(bson.M{"name": artist.Name})
a, count, err := getQueryTime(query)
if err != nil {
t.Error(err)
}
t.Logf("%s found...%d\n", a.Name, count)
}
t.Log("")
t.Log("after indexing")
results := []int{}
for _, artist := range artists {
keys := []string{"name", "aliases.name", "tags.value", "rating.value"}
for _, key := range keys {
err = c.EnsureIndexKey(key)
if err != nil {
t.Error(err)
}
}
query := c.Find(bson.M{"name": artist.Name})
a, count, err := getQueryTime(query)
if err != nil {
t.Error(err)
}
t.Logf("%s found...%d\n", a.Name, count)
results = append(results, a.ID)
}
if diff := deep.Equal(results, testcase.want); diff != nil {
t.Error(diff)
}
if err = db.DropDatabase(); err != nil {
t.Errorf("could not delete database\n %s\n", err)
}
if err = os.Remove(testcase.file); err != nil {
t.Errorf("could not delete a file: %s\n %s\n", testcase.file, err)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment