Last active
October 28, 2021 06:22
-
-
Save imjasonh/786085274ca135b94f07 to your computer and use it in GitHub Desktop.
Go script to parse NOAA data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt | |
package main | |
import ( | |
"archive/tar" | |
"compress/gzip" | |
"errors" | |
"flag" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"strconv" | |
"strings" | |
"time" | |
) | |
var ( | |
tgn = flag.String("in", "", "input file") | |
outn = flag.String("out", "out.csv.gz", "output file") | |
) | |
func main() { | |
flag.Parse() | |
tgf, err := os.Open(*tgn) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer tgf.Close() | |
gzr, err := gzip.NewReader(tgf) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer gzr.Close() | |
t := tar.NewReader(gzr) | |
outf, err := os.Create(*outn) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer outf.Close() | |
w := gzip.NewWriter(outf) | |
for { | |
if _, err := t.Next(); err == io.EOF { | |
break | |
} else if err != nil { | |
log.Fatal(err) | |
} | |
d := &decoder{r: t, w: w} | |
for { | |
if err := d.decode(); err == io.EOF { | |
break | |
} else if err != nil { | |
log.Fatal(err) | |
} | |
} | |
} | |
} | |
type decoder struct { | |
r io.Reader | |
w io.Writer | |
err error | |
} | |
func (d *decoder) decode() error { | |
st := d.read(11) | |
y := d.readInt(4) | |
m := time.Month(d.readInt(2)) | |
e := d.read(4) | |
for i := 0; i < 31; i++ { | |
t := time.Date(y, m, i+1, 0, 0, 0, 0, time.UTC) | |
d.write(fmt.Sprintf("%d", t.Unix())) | |
d.write(",") | |
d.write(st) | |
d.write(",") | |
d.write(e) | |
d.write(",") | |
d.write(d.read(5)) | |
d.write(",") | |
d.write(d.read(1)) | |
d.write(",") | |
d.write(d.read(1)) | |
d.write(",") | |
d.write(d.read(1)) | |
d.write("\n") | |
} | |
if d.err != nil { | |
return d.err | |
} | |
if d.read(1)[0] != '\n' { | |
return errors.New("not end of line") | |
} | |
return d.err | |
} | |
func (d *decoder) read(l int) string { | |
if d.err != nil { | |
return "" | |
} | |
b := make([]byte, l) | |
n, err := io.ReadFull(d.r, b) | |
if err != nil { | |
d.err = err | |
return "" | |
} | |
if n != l { | |
d.err = fmt.Errorf("didn't read enough, n=%d, l=%d", n, l) | |
} | |
s := strings.Trim(string(b), " ") | |
if s == "" { | |
return `""` | |
} | |
return s | |
} | |
func (d *decoder) readInt(l int) int { | |
if d.err != nil { | |
return 0 | |
} | |
s := d.read(l) | |
i, err := strconv.Atoi(s) | |
if err != nil { | |
d.err = err | |
} | |
return i | |
} | |
func (d *decoder) write(s string) { | |
if d.err != nil { | |
return | |
} | |
if _, err := d.w.Write([]byte(s)); err != nil { | |
d.err = err | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{"name":"date", "type":"timestamp", "mode":"required"}, | |
{"name":"station_id", "type":"string", "mode":"required"}, | |
{"name":"element", "type":"string", "mode":"required"}, | |
{"name":"value", "type":"integer", "mode":"required"}, | |
{"name":"mflag", "type":"string", "mode":"nullable"}, | |
{"name":"qflag", "type":"string", "mode":"nullable"}, | |
{"name":"sflag", "type":"string", "mode":"nullable"} | |
] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment