Created
April 20, 2018 14:20
-
-
Save toVersus/7babaed2cc643f1bf472986f35b5616c to your computer and use it in GitHub Desktop.
[Language Processing 100 Essentials] #65: Retrieve artist information from MongoDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"flag" | |
"fmt" | |
"io" | |
"os" | |
"time" | |
mgo "gopkg.in/mgo.v2" | |
"gopkg.in/mgo.v2/bson" | |
) | |
type Artist struct { | |
ID int `json:"id"` | |
GID string `json:"gid"` | |
Name string `json:"name"` | |
SortName string `json:"sort_name"` | |
Area string `json:"area"` | |
Aliases []*Aliase `json:"aliases"` | |
Begin *Begin `json:"begin"` | |
End *End `json:"end"` | |
Tags []*Tag `json:"tags"` | |
Rating *Rating `json:"rating"` | |
} | |
type Artists []*Artist | |
type Aliase struct { | |
Name string `json:"name"` | |
SortName string `json:"sort_name"` | |
} | |
type Begin struct { | |
Year int `json:"year"` | |
Month int `json:"month"` | |
Date int `json:"date"` | |
} | |
type End struct { | |
Year int `json:"year"` | |
Month int `json:"month"` | |
Date int `json:"date"` | |
} | |
type Tag struct { | |
Count int `json:"count"` | |
Value string `json:"value"` | |
} | |
type Rating struct { | |
Count int `json:"count"` | |
Value int `json:"value"` | |
} | |
func main() { | |
var filepath, artistName string | |
flag.StringVar(&filepath, "file", "", "specify a file path") | |
flag.StringVar(&filepath, "f", "", "specify a file path") | |
flag.StringVar(&artistName, "name", "", "specify the artist name") | |
flag.StringVar(&artistName, "n", "", "specify the artist name") | |
flag.Parse() | |
artists, err := readBSON(filepath) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
session, err := mgo.Dial("mongodb://localhost") | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
c := session.DB("MusicBrainz").C("artist") | |
size := len(artists) | |
for progress, artist := range artists { | |
err := c.Insert(artist) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
if progress%10000 == 0 { | |
fmt.Printf("%d / %d...completed\n", progress, size) | |
} | |
} | |
query := c.Find(bson.M{"name": artistName}) | |
artist, _, err := getQueryTime(query) | |
fmt.Println(artist) | |
} | |
func readBSON(path string) (Artists, error) { | |
f, err := os.Open(path) | |
if err != nil { | |
return nil, fmt.Errorf("could not open a file: %s\n %s", path, err) | |
} | |
defer f.Close() | |
var artists Artists | |
reader := bufio.NewReader(f) | |
for { | |
artist := Artist{} | |
buf, readErr := reader.ReadBytes('\n') | |
if (readErr != nil) && (readErr != io.EOF) { | |
panic(err) | |
} | |
if err = bson.UnmarshalJSON(buf, &artist); err != nil && readErr != io.EOF { | |
fmt.Print("could not parse json file.") | |
break | |
} | |
artists = append(artists, &artist) | |
if readErr == io.EOF { | |
break | |
} | |
} | |
return artists, nil | |
} | |
func getQueryTime(query *mgo.Query) (*Artist, time.Duration, error) { | |
artist := &Artist{} | |
start := time.Now() | |
if err := query.One(&artist); err != nil { | |
return nil, 0, err | |
} | |
return artist, time.Now().Sub(start), nil | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"os" | |
"testing" | |
"github.com/go-test/deep" | |
mgo "gopkg.in/mgo.v2" | |
"gopkg.in/mgo.v2/bson" | |
) | |
var insertDocsTests = []struct { | |
name string | |
file string | |
text string | |
want []int | |
}{ | |
{ | |
name: "should get the tags", | |
file: "./fulltext-test.json", | |
text: `{"name": "Sam James", "area": "United States", "gender": "Male", "sort_name": "James, Sam", "ended": true, "gid": "183da4be-0cb0-4e6d-ba6d-91e57b7a6780", "type": "Person", "id": 729749, "aliases": [{"name": "Sam James Vende", "sort_name": "Sam James Vende"}]} | |
{"name": "Norman Kolodziej", "area": "Germany", "gender": "Male", "sort_name": "Kolodziej, Norman", "ended": true, "gid": "5ff386f1-2c4e-4c1c-b5fc-668ec25e1b3e", "type": "Person", "id": 811484} | |
{"name": "Bass Cube", "sort_name": "Bass Cube", "ended": true, "gid": "f1568f36-152b-40da-aef3-3582636f88be", "type": "Group", "id": 6153} | |
{"name": "Medras", "sort_name": "Medras", "ended": true, "gid": "a7d007ec-8026-4e84-982d-b6306baa14df", "type": "Person", "id": 723542} | |
{"name": "Kalev Lindal", "area": "Estonia", "gender": "Male", "sort_name": "Lindal, Kalev", "ended": true, "gid": "8864f9e3-6a03-40a7-9acb-ac8386d404e7", "type": "Person", "id": 892318} | |
{"name": "Nick Flower", "sort_name": "Flower, Nick", "ended": true, "gid": "537b606c-d7e4-4c8a-8e39-91e22c2bf720", "type": "Person", "id": 725047} | |
{"name": "Danièle Forget", "sort_name": "Forget, Danièle", "ended": true, "gid": "5d6d5857-3d03-4b1b-a3ee-d789e883c1b2", "type": "Person", "id": 726135}`, | |
want: []int{729749, 811484, 6153, 723542, 892318, 725047, 726135}, | |
}, | |
} | |
func TestGetTags(t *testing.T) { | |
for _, testcase := range insertDocsTests { | |
t.Log(testcase.name) | |
f, err := os.Create(testcase.file) | |
if err != nil { | |
t.Errorf("could not create a file: %s\n %s\n", testcase.file, err) | |
} | |
f.WriteString(testcase.text) | |
f.Close() | |
artists, err := readBSON(testcase.file) | |
if err != nil { | |
t.Errorf("could not parse a JSON file: %s\n %s\n", testcase.file, err) | |
} | |
session, err := mgo.Dial("mongodb://localhost") | |
if err != nil { | |
t.Error(err) | |
} | |
db := session.DB("Testing") | |
c := db.C("artist") | |
for _, artist := range artists { | |
err := c.Insert(artist) | |
if err != nil { | |
t.Error(err) | |
} | |
} | |
t.Log("") | |
t.Log("before indexing") | |
for _, artist := range artists { | |
query := c.Find(bson.M{"name": artist.Name}) | |
a, count, err := getQueryTime(query) | |
if err != nil { | |
t.Error(err) | |
} | |
t.Logf("%s found...%d\n", a.Name, count) | |
} | |
t.Log("") | |
t.Log("after indexing") | |
results := []int{} | |
for _, artist := range artists { | |
keys := []string{"name", "aliases.name", "tags.value", "rating.value"} | |
for _, key := range keys { | |
err = c.EnsureIndexKey(key) | |
if err != nil { | |
t.Error(err) | |
} | |
} | |
query := c.Find(bson.M{"name": artist.Name}) | |
a, count, err := getQueryTime(query) | |
if err != nil { | |
t.Error(err) | |
} | |
t.Logf("%s found...%d\n", a.Name, count) | |
results = append(results, a.ID) | |
} | |
if diff := deep.Equal(results, testcase.want); diff != nil { | |
t.Error(diff) | |
} | |
if err = db.DropDatabase(); err != nil { | |
t.Errorf("could not delete database\n %s\n", err) | |
} | |
if err = os.Remove(testcase.file); err != nil { | |
t.Errorf("could not delete a file: %s\n %s\n", testcase.file, err) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment