Created
October 24, 2016 05:05
-
-
Save jessedearing/22b313347747423b4bbf61b7ed73e0a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"database/sql" | |
"fmt" | |
"log" | |
"os" | |
"path/filepath" | |
"regexp" | |
"sync" | |
_ "github.com/go-sql-driver/mysql" | |
) | |
func main() { | |
// Connect to the database | |
db, err := sql.Open("mysql", os.Getenv("MYSQL_USER")+":"+os.Getenv("MYSQL_PWD")+"@tcp(127.0.0.1:3306)/jobs") | |
if err != nil { | |
log.Panic(err) | |
} | |
defer db.Close() | |
// Find all the files we're going to read | |
files, err := filepath.Glob("./**/*.txt") | |
if err != nil { | |
log.Panic(err) | |
} | |
// Drop the table so we can recreate it | |
_, err = db.Exec("drop table if exists words") | |
if err != nil { | |
log.Panic(err) | |
} | |
// Recreate the table | |
_, err = db.Exec("create table words (id bigint unsigned auto_increment primary key, position int, file varchar(80), word varchar(255), key ix_Word (word))") | |
if err != nil { | |
log.Panic(err) | |
} | |
// Use a waitgroup so we can run loading every file in it's own goroutine | |
var wg *sync.WaitGroup | |
wg = new(sync.WaitGroup) | |
for _, file := range files { | |
wg.Add(1) | |
go processFile(file, db, wg) | |
} | |
wg.Wait() | |
} | |
func processFile(file string, db *sql.DB, wg *sync.WaitGroup) { | |
defer wg.Done() | |
fileh, err := os.Open(file) | |
if err != nil { | |
log.Panic(err) | |
} | |
defer fileh.Close() | |
var word, rawString string | |
var x int | |
for { | |
// Fscan will read non-whitespace characters | |
l, err := fmt.Fscan(fileh, &rawString) | |
if l == 0 { | |
break | |
} | |
// Regex out any additional junk | |
exp := regexp.MustCompile("\\w+") | |
words := exp.FindStringSubmatch(rawString) | |
// Skip if there was all junk | |
if len(words) == 0 { | |
continue | |
} | |
word = words[0] | |
filename := filepath.Base(file) | |
_, err = db.Exec("insert into words (position, file, word) values (?,?,?)", x, filename, word) | |
if err != nil { | |
log.Panic(err) | |
} | |
// Increment so we can track the position the words are in | |
x++ | |
if err != nil { | |
fmt.Print(err) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment