cbassa/plot_hydrogen.py

## plot_hydrogen.py
#!/usr/bin/env python3
import os
import re
import glob
import tqdm

from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt

def parse_header(header_b):
    # TODO. Support files with the additional fields
    #       - NBITS
    #       - MEAN
    #       - RMS
    # "HEADER\nUTC_START    %s\nFREQ         %lf Hz\nBW           %lf Hz\nLENGTH       %f s\nNCHAN        %d\nNSUB         %d\n"

    header_s = header_b.decode('ASCII').strip('\x00')

    regex = r"^HEADER\nUTC_START    (.*)\nFREQ         (.*) Hz\nBW           (.*) Hz\nLENGTH       (.*) s\nNCHAN        (.*)\nNSUB         (.*)\nEND\n$"
    match = re.fullmatch(regex, header_s, re.MULTILINE)

    utc_start = datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S.%f')

    return {'utc_start': utc_start,
            'freq': float(match.group(2)),
            'bw': float(match.group(3)),
            'length': float(match.group(4)),
            'nchan': int(match.group(5)),
            'nsub': int(match.group(6))}


def read_spectrum(froot):
    # Get the number of files
    fnames = sorted(glob.glob(f"{froot}*.bin"))
    path = os.path.dirname(fnames[0])
    datestr = os.path.basename(fnames[0]).split("_")[0]

    # Read first file
    with open(fnames[0], "rb") as f:
        header = parse_header(f.read(256))

    # Read spectrum
    zs = []
    headers = []
    for i_file in tqdm.tqdm(range(len(fnames))):
        fname = os.path.join(path, f"{datestr}_{i_file:06d}.bin")
        # i_sub = 0
        with open(fname, "rb") as f:
            next_header = f.read(256)
            while(next_header):
                ztmp = np.fromfile(f, dtype=np.float32, count=header["nchan"])
                if ztmp.shape[0] == header["nchan"]:
                    headers.append(parse_header(next_header))
                    zs.append(ztmp)
                next_header = f.read(256)
    return np.transpose(np.vstack(zs)), headers


if __name__ == "__main__":
    froot = "2021-07-27T12:24:29"

    z, headers = read_spectrum(froot)
    cfreq = headers[0]["freq"]
    bw = headers[0]["bw"]
    nchan = headers[0]["nchan"]

    freq = cfreq + bw * np.linspace(-0.5, 0.5, nchan)


    z = z[11000:15000, :]
    freq = freq[11000:15000] * 1e-6 - 1420

    nchan, nsub = z.shape
    nbin = 10
    nsub_binned = nsub // nbin
    z = z[:, :nsub_binned * nbin].reshape(nchan, nsub_binned, nbin).mean(axis=2)
    t = np.arange(nsub // nbin)

    tmin, tmax = np.min(t), np.max(t)
    fmin, fmax = np.min(freq), np.max(freq)

    fig, ax = plt.subplots(figsize=(10, 8))

    vmean, vstd = np.mean(z), np.std(z)
    vmin, vmax = vmean - 2 * vstd, vmean + 3 * vstd

    ax.imshow(z, origin="lower", aspect="auto",
              vmin=vmin, vmax=vmax, extent=[tmin, tmax, fmin, fmax])

    ax.set_xlabel("Subintegration (10s)")
    ax.set_ylabel("Frequency (MHz) - 1420 MHz")
    ax.set_title("Hydrogen line from Dwingeloo Telescope")

    plt.tight_layout()
    plt.savefig("hydrogen.png", bbox_inches="tight")
	#!/usr/bin/env python3
	import os
	import re
	import glob
	import tqdm

	from datetime import datetime

	import numpy as np
	import matplotlib.pyplot as plt

	def parse_header(header_b):
	# TODO. Support files with the additional fields
	# - NBITS
	# - MEAN
	# - RMS
	# "HEADER\nUTC_START %s\nFREQ %lf Hz\nBW %lf Hz\nLENGTH %f s\nNCHAN %d\nNSUB %d\n"

	header_s = header_b.decode('ASCII').strip('\x00')

	regex = r"^HEADER\nUTC_START (.)\nFREQ (.) Hz\nBW (.) Hz\nLENGTH (.) s\nNCHAN (.)\nNSUB (.)\nEND\n$"
	match = re.fullmatch(regex, header_s, re.MULTILINE)

	utc_start = datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S.%f')

	return {'utc_start': utc_start,
	'freq': float(match.group(2)),
	'bw': float(match.group(3)),
	'length': float(match.group(4)),
	'nchan': int(match.group(5)),
	'nsub': int(match.group(6))}


	def read_spectrum(froot):
	# Get the number of files
	fnames = sorted(glob.glob(f"{froot}*.bin"))
	path = os.path.dirname(fnames[0])
	datestr = os.path.basename(fnames[0]).split("_")[0]

	# Read first file
	with open(fnames[0], "rb") as f:
	header = parse_header(f.read(256))

	# Read spectrum
	zs = []
	headers = []
	for i_file in tqdm.tqdm(range(len(fnames))):
	fname = os.path.join(path, f"{datestr}_{i_file:06d}.bin")
	# i_sub = 0
	with open(fname, "rb") as f:
	next_header = f.read(256)
	while(next_header):
	ztmp = np.fromfile(f, dtype=np.float32, count=header["nchan"])
	if ztmp.shape[0] == header["nchan"]:
	headers.append(parse_header(next_header))
	zs.append(ztmp)
	next_header = f.read(256)
	return np.transpose(np.vstack(zs)), headers


	if __name__ == "__main__":
	froot = "2021-07-27T12:24:29"

	z, headers = read_spectrum(froot)
	cfreq = headers[0]["freq"]
	bw = headers[0]["bw"]
	nchan = headers[0]["nchan"]

	freq = cfreq + bw * np.linspace(-0.5, 0.5, nchan)


	z = z[11000:15000, :]
	freq = freq[11000:15000] * 1e-6 - 1420

	nchan, nsub = z.shape
	nbin = 10
	nsub_binned = nsub // nbin
	z = z[:, :nsub_binned * nbin].reshape(nchan, nsub_binned, nbin).mean(axis=2)
	t = np.arange(nsub // nbin)

	tmin, tmax = np.min(t), np.max(t)
	fmin, fmax = np.min(freq), np.max(freq)

	fig, ax = plt.subplots(figsize=(10, 8))

	vmean, vstd = np.mean(z), np.std(z)
	vmin, vmax = vmean - 2 * vstd, vmean + 3 * vstd

	ax.imshow(z, origin="lower", aspect="auto",
	vmin=vmin, vmax=vmax, extent=[tmin, tmax, fmin, fmax])

	ax.set_xlabel("Subintegration (10s)")
	ax.set_ylabel("Frequency (MHz) - 1420 MHz")
	ax.set_title("Hydrogen line from Dwingeloo Telescope")

	plt.tight_layout()
	plt.savefig("hydrogen.png", bbox_inches="tight")