Last active
August 29, 2015 14:19
-
-
Save astro313/85206bf29f67463046ab to your computer and use it in GitHub Desktop.
DataMining Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot as plt | |
import numpy as np | |
import matplotlib | |
from astropy.io import fits | |
from astropy.convolution import Gaussian2DKernel | |
from scipy import ndimage, stats, interpolate, signal | |
import random | |
plt.rcParams['font.family'] = 'serif' | |
plt.rcParams['font.size'] = 18 | |
plt.rcParams['figure.figsize'] = (12, 8) | |
def power_fit(S, k, gamma): | |
""" | |
source amplitude distributed as power law | |
Input: | |
------ | |
S: float | |
flux density, or counts | |
k: float | |
scaling factor | |
gamma: float | |
spectral index | |
""" | |
return k * S ** gamma | |
def read_data_fromtxt(filename): | |
""" | |
Return: | |
np.array format | |
""" | |
import astropy.io.ascii | |
data = astropy.io.ascii.read(filename, comment='^#') | |
if len(filename) == 0: | |
errstr = "No data read from %s" % filename | |
raise IOError(errstr) | |
return np.asarray(data) | |
def getColn(dataCounts, n): | |
""" | |
return nth col of data | |
Input: | |
data: array | |
n: int | |
Returns: | |
data: array | |
nth col of data | |
""" | |
if not isinstance(n, int): | |
raise TypeError('n MUST be an integer! ') | |
data = [] | |
for i in np.arange(len(dataCounts)): | |
data.append(dataCounts[i][n]) | |
return np.array(data) | |
def conf(beam, para_gamma=2.5, q=5.): | |
""" | |
due to faint source fluctuation, look at lecture slides | |
q is an arbitary parameter, and 5 is the typical value | |
gamma = 2.5 would be the Eculidean value | |
Input: | |
------ | |
q: float | |
default = 5 | |
gamma: float | |
2.1 for Condon best value | |
beam: float | |
sr | |
Returns: | |
-------- | |
sigma: float | |
confusion sigma | |
""" | |
para_k = 10. ** 4 # dep on dnds, ~ 4 from Gal project | |
sigma = (q ** (3. - para_gamma) / (3 - para_gamma)) \ | |
** (1. / (para_gamma - 1)) \ | |
* (beam * para_k) ** (1. / (para_gamma - 1)) | |
return sigma | |
def Eff_beam(beamSolidAngle, gamma=2.5): | |
""" | |
Calculate the effective beam area of instrument based on Condon 1974, depends on gamma; | |
The functional form here uses Condon's P(D) approach, which calculates confusion due to weak sources (fluctuations), inplicit in gamma, where gamma relates to the # of counts in dnds | |
Input: | |
------ | |
gamma: float | |
default to the best value from Condon = 2.1 | |
Returns: | |
-------- | |
eff_arcsec2: float | |
arcsec^2 | |
""" | |
eff_arcsec2 = beamSolidAngle / (gamma - 1) | |
return eff_arcsec2 | |
def PSF_beam(major, minor): | |
""" | |
Returns gaussian beam PSF in arcsec2 | |
Input: | |
------ | |
major: float | |
arcsec FWHM | |
minor: float | |
arcsec FWHM | |
""" | |
PSF = (np.pi / 4.) * major * minor / np.log(2) | |
return PSF | |
def posErr(sigma, FWHM, threshold): | |
""" | |
Calculate the possition accuracy based on Condon Memo: | |
\sigma_p = \sigma_{tot} * FWHM / (2 * threshold) | |
""" | |
pos_err = sigma * FWHM / (2. * threshold) | |
return pos_err | |
def Ns(snu_ax, dnds): | |
""" | |
Make distribution of N as a function of S | |
""" | |
Delta_S = [] | |
s_last = 0 | |
for i, s in enumerate(snu_ax): | |
Delta_S.append(s-s_last) | |
s_last = s | |
Delta_S = np.array(Delta_S) | |
N = np.zeros(np.shape(dnds)) | |
for i in range(len(N)): | |
N[i] = dnds[i] * Delta_S[i] | |
return N | |
class initializeMCMC(): | |
def __init__(self): | |
self.lowlim = np.array([3*sigma, 0, 0, 0.1]) | |
self.uplim = np.array([700, Imsize, Imsize, beamGauss]) | |
def generate_initial_values(self, ndim, nwalkers, initvals, initsigma): | |
""" Generate a set of nvalues initial parameters. | |
Parameters | |
---------- | |
initvals : ndarray | |
Initial parameter values in order A, x, y, R. | |
initsigma : ndarray | |
Sigma values for each parameter. | |
Returns | |
------- | |
p0 : ndarray | |
A nwalkers x ndim array of initial values. | |
Notes | |
----- | |
This is only interesting because it has to obey parameter limits. | |
As a consequence, the user-provided initial values may be ignored. | |
This will take care of fixed parameters correctly. | |
""" | |
if len(initvals) != ndim: | |
raise ValueError("Initial values not expected length") | |
if len(initsigma) != ndim: | |
raise ValueError("Initial sigma values not expected length") | |
# First -- see if any of the initial values lie outside | |
# the upper/lower limits. | |
outside_limits = [False] * ndim | |
for i, val in enumerate(initvals): | |
#Everything has a lower limit... | |
if val < self.lowlim[i]: | |
outside_limits[i] = True | |
elif val > self.uplim[i]: | |
outside_limits[i] = True | |
# Adjust initial values if necessary. We try to keep them | |
# close to the user provided value, just shifting into the | |
# range by a few sigma. If the range is too small, just | |
# stick it in the middle. | |
int_init = np.zeros(ndim) | |
for i in range(ndim): | |
if outside_limits[i]: | |
par_range = self.uplim[i] - self.lowlim[i] | |
if par_range <= 0: | |
errmsg = "Limits on parameter {:d} cross" | |
raise ValueError(errmsg.format(i)) | |
if 2.0 * initsigma[i] >= par_range: | |
int_init[i] = self.lowlim[i] + 0.5 * par_range | |
else: | |
# Figure out if we are below or above | |
if initvals[i] < self.lowlim[i]: | |
int_init[i] = self.lowlim[i] + 2 * initsigma[i] | |
else: | |
int_init[i] = self.uplim[i] - 2 * initsigma[i] | |
else: | |
int_init[i] = initvals[i] | |
# Make p0 (output) | |
p0 = np.zeros((nwalkers, ndim)) | |
maxiters = 100 | |
for i in range(ndim): | |
lwlim = self.lowlim[i] | |
hs_uplim = 1 | |
uplim = self.uplim[i] | |
pvec = initsigma[i] * np.random.randn(nwalkers) + \ | |
int_init[i] | |
# Now reject and regenerate anything outside limits | |
if hs_uplim: | |
badidx = np.logical_or(pvec > uplim, | |
pvec < lwlim).nonzero()[0] | |
else: | |
badidx = np.nonzero(pvec < lwlim)[0] | |
iters = 0 | |
nbad = len(badidx) | |
while nbad > 0: | |
pvec[badidx] = initsigma[i] * np.random.randn(nbad) + \ | |
int_init[i] | |
iters += 1 | |
if hs_uplim: | |
badidx = \ | |
np.logical_or(pvec > uplim, | |
pvec < lwlim).nonzero()[0] | |
else: | |
badidx = np.nonzero(pvec < lwlim)[0] | |
nbad = len(badidx) | |
if iters > maxiters: | |
errmsg = "Too many iterations initializing param {:d}" | |
raise Exception(errmsg.format(i)) | |
p0[:, i] = pvec | |
return p0 | |
def chain_iter(sam, nwalkers, nburn): | |
par = ('A', 'x', 'y', 'R') | |
plt.figure(figsize=[20, 10]) | |
for i, p in enumerate(par): | |
ncols = (len(par)/2 + 1) if len(par)%2 != 0 else len(par)/2 | |
plt.subplot(len(par) / 2, ncols, i + 1) | |
for w in range(nwalkers): | |
plt.plot(np.arange(sam.chain.shape[1]), sam.chain[w, :, i], 'b-', alpha=0.1) | |
plt.ylabel(p) | |
plt.xlabel('Iter') | |
aymin, aymax = plt.ylim() | |
plt.vlines(nburn, aymin, aymax, linestyle=':') | |
# plt.suptitle('performance of each paramater as a function of iter', fontsize=14, y=1.1) | |
plt.show() | |
def plotChain(sam): | |
f, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 8)) | |
ax1 = axs[0, 0] | |
# h1 = ax1.hist(sam.chain[:,:,0].reshape((-1, 4))) | |
h1 = ax1.hist(sam.chain[:, :, 0].flatten()) | |
ax1.set_xlabel('A') | |
ax1.set_ylabel(r'$\cal L$') | |
ax1.set_yticks([]) | |
ax2 = axs[0, 1] | |
h2 = ax2.hist(sam.chain[:,:,1].flatten()) | |
ax2.set_xlabel('x ') | |
ax2.set_ylabel(r'$\cal L$') | |
ax2.set_yticks([]) | |
ax3 = axs[1, 0] | |
h3 = ax3.hist(sam.chain[:,:,2].flatten()) | |
ax3.set_xlabel('y') | |
ax3.set_ylabel(r'$\cal L$') | |
ax3.set_yticks([]) | |
ax4 = axs[1, 1] | |
h4 = ax4.hist(sam.chain[:,:,3].flatten()) | |
ax4.set_xlabel('R') | |
ax4.set_ylabel(r'$\cal L$') | |
ax4.set_yticks([]) | |
f.show() | |
def MFtemplate(S, posX, posY, thetaX, thetaY, arcsecPerPx, R): | |
""" | |
Assuming template to be the shape of source before convolving with beam, assumping independent of frequency for now or single-frequency. | |
Params: {thetaX, thetaY, S, R} | |
thetaX, thetaY = position | |
S = amplitude | |
R = spatial extent | |
Assuming 2D gaussian | |
Inputs: | |
------- | |
S: float | |
posX: int | |
posY: int | |
thetaX: float | |
thetaY: float | |
arcsecPerPx: float | |
R: float | |
Returns: | |
-------- | |
CircularGauss: float | |
the flux at (x,y) given some extent of source | |
""" | |
stuff = - ((posX*arcsecPerPx - thetaX) ** 2 + (posY*arcsecPerPx - thetaY) ** 2) / (2. * R**2) | |
CircularGauss = S * np.exp(stuff) | |
return CircularGauss | |
# --------------------- | |
path2Counts = '../Counts/' | |
f_Counts = 'Bethermin_model_counts_250um.txt' | |
dataCounts = read_data_fromtxt(path2Counts + f_Counts) | |
amp = getColn(dataCounts, 0) | |
dnds = getColn(dataCounts, 1) | |
############################################### | |
# --- Assignemnt ------- | |
# a few things (3) we can tweak: | |
# larger beam becomes confusion noise dominated | |
# larger image size, no source is overlapping unless | |
# we increase the number of sources as well | |
############################################### | |
beam_major_arcsec = 5.0 # arcsec, FWHM | |
beam_minor_arcsec = 5.0 | |
arcsec2rad = 1 / 206265. | |
Imsize = 128 | |
arcsecPerPx = 1.0 # 1 px = 1 arcsec | |
N_s = 1 | |
delta = 0.1 | |
SNR = 10. | |
max_R = beam_major_arcsec / (2 * np.sqrt(np.log(2))) | |
beamGauss = PSF_beam(beam_major_arcsec, beam_minor_arcsec) # arcsec^2 | |
beam_px = arcsecPerPx * beamGauss | |
effPSF_arcsec2 = Eff_beam(beamGauss) | |
# ---- Simulate map -------- | |
im = np.zeros((Imsize, Imsize)) | |
for k in range(N_s): | |
Ux = random.uniform(0+delta, 1-delta) | |
thetaX = Imsize * Ux | |
Uy = random.uniform(0+delta, 1-delta) | |
thetaY = Imsize * Uy | |
R = random.uniform(1, max_R) # extent | |
flux = stats.powerlaw.rvs(amp.mean()) * 100 * amp.mean() # say mJy | |
print "Simulating source properties: S={:f}, cenx={:f}, ceny={:f}, R={:f}".format(flux, thetaX, thetaY, R) | |
im[thetaY, thetaX] += flux | |
kernel_source = Gaussian2DKernel(R) | |
kernel_source.normalize() | |
im = signal.fftconvolve(im, kernel_source) | |
_resize_0 = slice(abs(Imsize-im.shape[0])/2, im.shape[0]-abs(Imsize-im.shape[0])/2) | |
_resize_1 = slice(abs(Imsize-im.shape[1])/2, im.shape[1]-abs(Imsize-im.shape[1])/2) | |
im = im[_resize_0, _resize_1] | |
kernel = Gaussian2DKernel(beam_major_arcsec * arcsecPerPx) | |
kernel.normalize() | |
image = signal.fftconvolve(im, kernel) # with zero-padding | |
resize_slice_0 = slice(abs(Imsize-image.shape[0])/2, image.shape[0]-abs(Imsize-image.shape[0])/2) | |
resize_slice_1 = slice(abs(Imsize-image.shape[1])/2, image.shape[1]-abs(Imsize-image.shape[1])/2) | |
image = image[resize_slice_0, resize_slice_1] # take out zero-padding | |
model_noiseSTD = image.max() / SNR | |
noise = stats.norm.rvs(0.0, model_noiseSTD, size=(Imsize, Imsize)) # mJy | |
cov_n = np.cov(noise.T) | |
noisy = image + noise # zero-mean noise | |
print "Image dtype: %s" % (noisy.dtype) | |
print "Image size: %6d" % (noisy.size) | |
print "Image shape: %3dx%3d" % (noisy.shape[0], noisy.shape[1]) | |
print "Max value %1.2f at pixel %6d" % (noisy.max(), noisy.argmax()) | |
print "Min value %1.2f at pixel %6d" % (noisy.min(), noisy.argmin()) | |
# fig = plt.figure(figsize=(12, 4)) | |
# ax2 = fig.add_subplot(133) | |
# ax2.imshow(noisy, cmap=plt.cm.rainbow) | |
# ax3 = fig.add_subplot(131) | |
# ax3.imshow(im, cmap=plt.cm.rainbow) | |
# ax5 = fig.add_subplot(132) | |
# ax5.imshow(image, cmap=plt.cm.rainbow) | |
# save = raw_input('save figure? y/n') | |
# if save == 'y': | |
# fig.savefig(place+name+'sim.eps', dvi=600) | |
# fig.show() | |
# ------ get noise --------- | |
sigma_c = abs(conf(effPSF_arcsec2/(206265.)**2)) # Jy | |
Jy2mJy = 1.e3 | |
print "Confusion: {:.2f} [mJy]".format(sigma_c * Jy2mJy) # mJy | |
# Get map noise | |
sigma_map = abs(np.std(noisy)) | |
print "Map noise: {:.2f} [mJy]".format(sigma_map) | |
sigma = sigma_c if sigma_c > sigma_map else sigma_map | |
# Getting filtered positions --------- | |
threshold5sig = 5 * sigma | |
threshold3sig = 3 * sigma | |
mask3sigma = (noisy >= threshold3sig) | |
# --- source found above 3 sigma------- | |
label3sig_im, nb3sig_labels = ndimage.label(mask3sigma) | |
# nb_labels - 1 = # found; label3sig_im is the list of slices with T-F matrix containing each of the found candidates; label.max() = nb_labels | |
image_found = np.zeros((noisy.shape[0], noisy.shape[1])) | |
for i in range(1, nb3sig_labels+1): | |
slice_row, slice_col = ndimage.find_objects(label3sig_im == i)[0] | |
# and has at least one pixel > 5 sigma | |
if not (noisy[slice_row, slice_col] > threshold5sig).any() is False: | |
image_found[slice_row, slice_col] = noisy[slice_row, slice_col] | |
# just to demonstrate source found -- | |
# fig = plt.figure() | |
# ax2 = fig.add_subplot(211) | |
# ax2.imshow(noisy, cmap=plt.cm.rainbow) | |
# ax4 = fig.add_subplot(212) | |
# ax4.imshow(image_found, cmap=plt.cm.rainbow) | |
# fig.show() | |
_SrcMsk = mask3sigma | |
_labelSrc, _nbSrc = label3sig_im, nb3sig_labels | |
size_min = np.floor(np.sqrt(beam_major_arcsec)) | |
print "** before **" | |
#print np.unique(_labelSrc) # remember everything else is 0 | |
print _nbSrc | |
print "-"*10 | |
for i in range(1, _nbSrc+1): | |
slice_row, slice_col = ndimage.find_objects(_labelSrc == i)[0] | |
# if too small | |
if ((slice_col.stop - slice_col.start) < size_min) | ((slice_row.stop - slice_row.start) < size_min): | |
_labelSrc[slice_row, slice_col] = 0 | |
labelFiltered_arr = np.unique(_labelSrc) | |
print "** after **" | |
print len(labelFiltered_arr) - 1 | |
# make a new label based on the cleaned up candidates, and re-count | |
label_clean = np.searchsorted(labelFiltered_arr, _labelSrc) | |
# make Bool mask | |
msk_clnSrc = np.where(label_clean != 0, label_clean, 0) | |
_idx_msk = np.where(msk_clnSrc != 0) | |
msk_clnSrc[_idx_msk] = 1 | |
nb_clnSrc = np.unique(label_clean)[1:] # excluding those with label-0, which is not a source | |
# get CM of each candidates ---- | |
coords = ndimage.center_of_mass(noisy, label_clean, nb_clnSrc) | |
# coords = ndimage.maximum_position(noisy, label_clean, nb_clnSrc) | |
xcoords = np.array([k[1] for k in coords]) | |
ycoords = np.array([k[0] for k in coords]) | |
image_cln = np.zeros((noisy.shape[0], noisy.shape[1])) | |
sizes1 = ndimage.sum(msk_clnSrc, label_clean, nb_clnSrc) # extent | |
fluxes = ndimage.sum(noisy, label_clean, nb_clnSrc) | |
data1 = fluxes | |
for i in nb_clnSrc: # for each island | |
slice_row, slice_col = ndimage.find_objects(label_clean == i)[0] | |
image_cln[slice_row, slice_col] = noisy[slice_row, slice_col] | |
# fig = plt.figure(figsize=(12, 4)) | |
# ax2 = fig.add_subplot(131) | |
# ax2.imshow(noisy, cmap=plt.cm.rainbow) | |
# ax3 = fig.add_subplot(132) | |
# ax3.imshow(image_found, cmap=plt.cm.rainbow) | |
# ax4 = fig.add_subplot(133) | |
# # cleaned up extreme sizes source | |
# ax4.imshow(image_cln, cmap=plt.cm.rainbow) | |
# save = raw_input('save figure? y/n') | |
# if save == 'y': | |
# fig.savefig(place+name+'foundCln.eps', dvi=600) | |
# fig.show() | |
# plt.figure() | |
# plt.imshow(label_clean) | |
# if save == 'y': | |
# plt.savefig(place+name+'structCln.eps', dvi=600) | |
# plt.show() | |
################################### | |
import emcee | |
nburn = 100 | |
nwalkers = 500 | |
nsteps = 1000 | |
nthread = 3 | |
def model_new(params, refx, refy): | |
""" | |
using CM as refx, refy | |
""" | |
Flux, posX, posY, extent = params | |
mPerPx = MFtemplate(Flux, posX, posY, refx, refy, arcsecPerPx, extent) | |
return mPerPx | |
def lnlike_new(p, tx, ty, y, n): | |
return -0.5 * np.sum(((y - model_new(p, tx, ty))/n) ** 2) | |
def lnprob(pars, refx, refy, y, e): | |
Flux, posX, posY, extent = pars | |
if (100 < Flux < 550 and (refx - beam_major_arcsec < posX < refx + beam_major_arcsec) and (refy - beam_major_arcsec < posY < refy + beam_major_arcsec) and 0.1 < extent < beamGauss): | |
return lnlike_new(pars, refx, refy, y, e) | |
else: | |
return -np.inf | |
for i in nb_clnSrc: # for each island | |
slice_row, slice_col = ndimage.find_objects(label_clean == i)[0] | |
flux_ls = [] | |
refx = xcoords[i-1] # corresponds to col, counts from 0 | |
refy = ycoords[i-1] # corresponds to row | |
print "CM x={:.1f}, y={:.1f}".format(refx, refy) | |
R = np.sqrt(sizes1[i-1])/2 | |
initial = np.array([data1[i-1], refx, refy, R]) | |
initial_sig = np.array([20, 2, 2, 1.]) | |
ndim = len(initial) | |
Pre = initializeMCMC() | |
pos = Pre.generate_initial_values(ndim, nwalkers, initial, initial_sig) | |
for j in range(slice_row.start, slice_row.stop): | |
for k in range(slice_col.start, slice_col.stop): | |
# for each pixel in an Island | |
fluxThisPx = noisy[j, k] | |
flux_ls.append(fluxThisPx) | |
print sum(flux_ls) | |
# accounting for just 3 sigma, find best fit A, X, Y, R, note it's convolved with beam, so X, Y are more reliable than A, R in circular symmetric beam | |
#using MLE | |
import scipy.optimize as op | |
nll = lambda *args: -lnlike_new(*args) | |
result = op.minimize(nll, list(initial), args=(refx, refy, flux_ls, sigma)) | |
res_a, res_x, res_y, res_r = result["x"] # doesn't work so well with the 3sigma structure, gives ok positions but crappy R and Amplitude | |
print result["x"] | |
# what about mcmc, imposing priors | |
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[refx, refy, flux_ls, sigma], threads=nthread) | |
print("Running burn-in...") | |
p0, prob, rstate = sampler.run_mcmc(pos, nburn) | |
sampler.reset() | |
print("Running production...") | |
sampler.run_mcmc(p0, nsteps, rstate0=rstate) | |
print(" Fit complete") | |
print(" Mean acceptance fraction: ", np.mean(sampler.acceptance_fraction)) | |
acor = sampler.acor | |
print(" Autocorrelation time: {:f} {:f} {:f} {:f}").format(*acor) | |
chain_iter(sampler, nwalkers, nburn) | |
plotChain(sampler) | |
# get best-param | |
bestIdx_flat = sampler.lnprobability[:].argmax() | |
idx = np.unravel_index(bestIdx_flat, sampler.lnprobability.shape) | |
best_val_allParams = sampler.chain[idx[0], idx[1], :] | |
# mean +/- | |
samples = sampler.chain[:, :, :].reshape((-1, ndim)) | |
vals = map(lambda v:(v[1], v[2]-v[1],v[1]-v[0]),zip(*np.percentile(samples, [16,50,84], axis=0))) | |
for i in range(len(vals)): | |
low, mean, up = vals[i] | |
print str(mean)+'+'+str(up)+'-'+str(low) | |
print "best: ", best_val_allParams | |
print "mean" | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot as plt | |
import numpy as np | |
import matplotlib | |
from astropy.io import fits | |
from scipy import ndimage, stats, interpolate, signal | |
import random | |
plt.rcParams['font.family'] = 'serif' | |
plt.rcParams['font.size'] = 18 | |
plt.rcParams['figure.figsize'] = (12, 8) | |
def power_fit(S, k, gamma): | |
""" | |
source amplitude distributed as power law | |
Input: | |
------ | |
S: float | |
flux density, or counts | |
k: float | |
scaling factor | |
gamma: float | |
spectral index | |
""" | |
return k * S ** gamma | |
def read_data_fromtxt(filename): | |
""" | |
Return: | |
np.array format | |
""" | |
import astropy.io.ascii | |
data = astropy.io.ascii.read(filename, comment='^#') | |
if len(filename) == 0: | |
errstr = "No data read from %s" % filename | |
raise IOError(errstr) | |
return np.asarray(data) | |
def getColn(dataCounts, n): | |
""" | |
return nth col of data | |
Input: | |
data: array | |
n: int | |
Returns: | |
data: array | |
nth col of data | |
""" | |
if not isinstance(n, int): | |
raise TypeError('n MUST be an integer! ') | |
data = [] | |
for i in np.arange(len(dataCounts)): | |
data.append(dataCounts[i][n]) | |
return np.array(data) | |
def conf(beam, para_gamma=2.5, q=5.): | |
""" | |
due to faint source fluctuation, look at lecture slides | |
q is an arbitary parameter, and 5 is the typical value | |
gamma = 2.5 would be the Eculidean value | |
Input: | |
------ | |
q: float | |
default = 5 | |
gamma: float | |
2.1 for Condon best value | |
beam: float | |
sr | |
Returns: | |
-------- | |
sigma: float | |
confusion sigma | |
""" | |
para_k = 10. ** 4 # dep on dnds, ~ 4 from Gal project | |
sigma = (q ** (3. - para_gamma) / (3 - para_gamma)) \ | |
** (1. / (para_gamma - 1)) \ | |
* (beam * para_k) ** (1. / (para_gamma - 1)) | |
return sigma | |
def Eff_beam(beamSolidAngle, gamma=2.5): | |
""" | |
Calculate the effective beam area of instrument based on Condon 1974, depends on gamma; | |
The functional form here uses Condon's P(D) approach, which calculates confusion due to weak sources (fluctuations), inplicit in gamma, where gamma relates to the # of counts in dnds | |
Input: | |
------ | |
gamma: float | |
default to the best value from Condon = 2.1 | |
Returns: | |
-------- | |
eff_arcsec2: float | |
arcsec^2 | |
""" | |
eff_arcsec2 = beamSolidAngle / (gamma - 1) | |
return eff_arcsec2 | |
def PSF_beam(major, minor): | |
""" | |
Returns gaussian beam PSF in arcsec2 | |
Input: | |
------ | |
major: float | |
arcsec FWHM | |
minor: float | |
arcsec FWHM | |
""" | |
PSF = (np.pi / 4.) * major * minor / np.log(2) | |
return PSF | |
def posErr(sigma, FWHM, threshold): | |
""" | |
Calculate the possition accuracy based on Condon Memo: | |
\sigma_p = \sigma_{tot} * FWHM / (2 * threshold) | |
""" | |
pos_err = sigma * FWHM / (2. * threshold) | |
return pos_err | |
def Ns(snu_ax, dnds): | |
""" | |
Make distribution of N as a function of S | |
""" | |
Delta_S = [] | |
s_last = 0 | |
for i, s in enumerate(snu_ax): | |
Delta_S.append(s-s_last) | |
s_last = s | |
Delta_S = np.array(Delta_S) | |
N = np.zeros(np.shape(dnds)) | |
for i in range(len(N)): | |
N[i] = dnds[i] * Delta_S[i] | |
return N | |
def MFtemplate(S, posX, posY, thetaX, thetaY, arcsecPerPx, R): | |
""" | |
Assuming template to be the shape of source before convolving with beam, assumping independent of frequency for now or single-frequency. | |
Params: {thetaX, thetaY, S, R} | |
thetaX, thetaY = position | |
S = amplitude | |
R = spatial extent | |
Assuming 2D gaussian | |
Inputs: | |
------- | |
S: float | |
posX: int | |
posY: int | |
thetaX: float | |
thetaY: float | |
arcsecPerPx: float | |
R: float | |
Returns: | |
-------- | |
CircularGauss: float | |
the flux at (x,y) given some extent of source | |
""" | |
stuff = - ((posX*arcsecPerPx - thetaX) ** 2 + (posY*arcsecPerPx - thetaY) ** 2) / (2. * R**2) | |
CircularGauss = S * np.exp(stuff) | |
return CircularGauss | |
# --------------------- | |
path2Counts = '../Counts/' | |
f_Counts = 'Bethermin_model_counts_250um.txt' | |
dataCounts = read_data_fromtxt(path2Counts + f_Counts) | |
amp = getColn(dataCounts, 0) | |
dnds = getColn(dataCounts, 1) | |
############################################### | |
# --- Assignemnt ------- | |
# a few things (3) we can tweak: | |
# larger beam becomes confusion noise dominated | |
# larger image size, no source is overlapping unless | |
# we increase the number of sources as well | |
############################################### | |
beam_major_arcsec = 5.0 # arcsec, FWHM | |
beam_minor_arcsec = 5.0 | |
strict = False | |
arcsec2rad = 1 / 206265. | |
Imsize = 256 | |
arcsecPerPx = 1.0 # 1 px = 1 arcsec | |
N_s = 20 | |
delta = 0.1 | |
SNR = 10. | |
place = './Figures/' | |
name = 'Ns'+str(N_s)+'_SNR'+str(SNR)+'_Imsize'+str(Imsize)+'_beamFWHM'+str(beam_major_arcsec)+'_strict'+str(strict) | |
# neighbor_n_L2 = 1 # take the neighboring 1 px for likelihood method 2 for now | |
point_source = True | |
#max_R = 5 | |
max_R = beam_major_arcsec / (2 * np.sqrt(np.log(2))) | |
if not point_source is False and max_R is None: | |
max_R = beam_major_arcsec / (2 * np.sqrt(np.log(2))) | |
beamGauss = PSF_beam(beam_major_arcsec, beam_minor_arcsec) # arcsec^2 | |
beam_px = arcsecPerPx * beamGauss | |
effPSF_arcsec2 = Eff_beam(beamGauss) | |
# ---- Simulate map -------- | |
im = np.zeros((Imsize, Imsize)) | |
for k in range(N_s): | |
Ux = random.uniform(0+delta, 1-delta) | |
thetaX = Imsize * Ux | |
Uy = random.uniform(0+delta, 1-delta) | |
thetaY = Imsize * Uy | |
R = random.uniform(1, max_R) # extent | |
flux = stats.powerlaw.rvs(amp.mean()) * 10 * amp.mean() # say mJy | |
for i in range(Imsize): | |
for j in range(Imsize): | |
im[i, j] += MFtemplate(flux, i, j, thetaX, thetaY, arcsecPerPx, R) | |
from astropy.convolution import Gaussian2DKernel | |
kernel = Gaussian2DKernel(beam_major_arcsec * arcsecPerPx) | |
kernel.normalize() | |
image = signal.fftconvolve(im, kernel) # with zero-padding | |
resize_slice_0 = slice(abs(Imsize-image.shape[0])/2, image.shape[0]-abs(Imsize-image.shape[0])/2) | |
resize_slice_1 = slice(abs(Imsize-image.shape[1])/2, image.shape[1]-abs(Imsize-image.shape[1])/2) | |
image = image[resize_slice_0, resize_slice_1] # take out zero-padding | |
model_noiseSTD = image.max() / SNR | |
noise = stats.norm.rvs(0.0, model_noiseSTD, size=(Imsize, Imsize)) # mJy | |
cov_n = np.cov(noise.T) | |
noisy = image + noise # zero-mean noise | |
print "Image dtype: %s" % (noisy.dtype) | |
print "Image size: %6d" % (noisy.size) | |
print "Image shape: %3dx%3d" % (noisy.shape[0], noisy.shape[1]) | |
print "Max value %1.2f at pixel %6d" % (noisy.max(), noisy.argmax()) | |
print "Min value %1.2f at pixel %6d" % (noisy.min(), noisy.argmin()) | |
fig = plt.figure(figsize=(12, 4)) | |
ax2 = fig.add_subplot(133) | |
ax2.imshow(noisy, cmap=plt.cm.rainbow) | |
ax3 = fig.add_subplot(131) | |
ax3.imshow(im, cmap=plt.cm.rainbow) | |
ax5 = fig.add_subplot(132) | |
ax5.imshow(image, cmap=plt.cm.rainbow) | |
save = raw_input('save figure? y/n') | |
if save == 'y': | |
fig.savefig(place+name+'sim.eps', dvi=600) | |
fig.show() | |
# ------ get noise --------- | |
sigma_c = conf(effPSF_arcsec2/(206265.)**2) # Jy | |
Jy2mJy = 1.e3 | |
print "Confusion: {:.2f} [mJy]".format(sigma_c * Jy2mJy) # mJy | |
# Get map noise | |
sigma_map = np.std(noisy) | |
print "Map noise: {:.2f} [mJy]".format(sigma_map) | |
sigma = sigma_c if sigma_c > sigma_map else sigma_map | |
# Getting filtered positions --------- | |
# Jy | |
threshold5sig = 5 * sigma | |
threshold3sig = 3 * sigma | |
# get the Bool grid for where the flux is above threshold | |
# mask5sigma = (noisy >= threshold5sig) | |
mask3sigma = (noisy >= threshold3sig) # & (noisy < threshold5sig) | |
# --- source found above 3 sigma------- | |
label3sig_im, nb3sig_labels = ndimage.label(mask3sigma) | |
# nb_labels - 1 = # found; label3sig_im is the list of slices with T-F matrix containing each of the found candidates; label.max() = nb_labels | |
image_found = np.zeros((noisy.shape[0], noisy.shape[1])) | |
for i in range(1, nb3sig_labels+1): | |
slice_row, slice_col = ndimage.find_objects(label3sig_im == i)[0] | |
# and has at least one pixel > 5 sigma | |
if not (noisy[slice_row, slice_col] > threshold5sig).any() is False: | |
image_found[slice_row, slice_col] = noisy[slice_row, slice_col] | |
# just to demonstrate source found -- | |
# fig = plt.figure() | |
# ax2 = fig.add_subplot(211) | |
# ax2.imshow(noisy, cmap=plt.cm.rainbow) | |
# ax4 = fig.add_subplot(212) | |
# ax4.imshow(image_found, cmap=plt.cm.rainbow) | |
# fig.show() | |
# refine mask and label to be those identified as candidates where at least one containing px is greater than 5 sigma ---- | |
# this turns out to be too strict if beam is larger than 5 arcsec, fluxes are smeared out, this becomes unrealistic | |
if strict is True: | |
_SrcMsk = (image_found >= threshold5sig) | |
_labelSrc, _nbSrc = ndimage.label(_SrcMsk) | |
else: | |
# use 3 sigma is fine | |
_SrcMsk = mask3sigma | |
_labelSrc, _nbSrc = label3sig_im, nb3sig_labels | |
# ################################################################ | |
# ---- Clear out extremely single pixel islands------ | |
# Based on the fact that the sources I am looking for should smear out to larger than 1 pixel for above 3 sigma" | |
# WARNING: THIS is invalid because of confusion as the large beam mixing sources together, --> "if only looking for point sources, then max_size should be beam size, else set to extremely high ~ no upper size limit" | |
# NO more upper limit on size | |
size_min = np.floor(np.sqrt(beam_major_arcsec)) | |
print "** before **" | |
#print np.unique(_labelSrc) # remember everything else is 0 | |
print _nbSrc | |
print "-"*10 | |
for i in range(1, _nbSrc+1): | |
slice_row, slice_col = ndimage.find_objects(_labelSrc == i)[0] | |
# if too small | |
if ((slice_col.stop - slice_col.start) < size_min) | ((slice_row.stop - slice_row.start) < size_min): | |
_labelSrc[slice_row, slice_col] = 0 | |
labelFiltered_arr = np.unique(_labelSrc) | |
print "** after **" | |
print len(labelFiltered_arr) - 1 | |
print "Any difference? Note that this can demonstrate confusion \n" | |
# make a new label based on the cleaned up candidates, and re-count | |
label_clean = np.searchsorted(labelFiltered_arr, _labelSrc) | |
# make Bool mask | |
msk_clnSrc = np.where(label_clean != 0, label_clean, 0) | |
_idx_msk = np.where(msk_clnSrc != 0) | |
msk_clnSrc[_idx_msk] = 1 | |
nb_clnSrc = np.unique(label_clean)[1:] # excluding those with label-0, which is not a source | |
def model1(Flux, posX, posY, refx, refy, extent): | |
mPerPx = MFtemplate(Flux, posX, posY, refx, refy, arcsecPerPx, extent) | |
return mPerPx | |
def likelihood(m_arr, data_arr, sigma): | |
pre_fac = -len(m_arr)/2 * np.log(2 * np.pi) - (0.5 * np.power(sigma, 2) *len(m_arr)) | |
diff = np.asarray(data_arr) - np.asarray(m_arr) | |
second = -0.5 * np.power(np.sum(diff/sigma), 2) | |
ln_like = pre_fac + second | |
return ln_like | |
def uniformPrior(up, low, norm=False): | |
return 1./(up - low) if not norm is False else 1 | |
## This unfortunately cannot be an optimization problem because my "data" is completely dependent on the threshold or non-parametric form that I have used to locate the source. | |
## Cannot use use mcmc for posterior, even though I know about the number of sources, but since they span over a pixel, and I am assuming a case where I have no knowledge of how extented these sources can be, there is no way I can compute the "data" of a source from the image. | |
# So all I can really do at this point is compare the likelihood that I get using different number of neighboring pixels, to estimtate or justify how likely the neighbor pixel contains part of the flux from the identified sources | |
# get CM of each candidates ---- | |
coords = ndimage.center_of_mass(noisy, label_clean, nb_clnSrc) | |
# coords = ndimage.maximum_position(noisy, label_clean, nb_clnSrc) | |
xcoords = np.array([x[1] for x in coords]) # col 1 is horizonal | |
ycoords = np.array([y[0] for y in coords]) # col 0 is vertical | |
image_cln = np.zeros((noisy.shape[0], noisy.shape[1])) | |
image_cln_neighbor_L2 = np.zeros((noisy.shape[0], noisy.shape[1])) | |
# # case 1: using square around structure | |
sizes1 = ndimage.sum(msk_clnSrc, label_clean, nb_clnSrc) # extent | |
fluxes = ndimage.sum(noisy, label_clean, nb_clnSrc) | |
data1 = fluxes | |
L1 = [] | |
for i in nb_clnSrc: # for each island | |
slice_row, slice_col = ndimage.find_objects(label_clean == i)[0] | |
# count the pix in each island | |
N_k_1 = abs(slice_row.start - slice_row.stop) * abs(slice_col.start - slice_col.stop) | |
model_Island = 0 | |
model_ls = [] | |
flux_ls = [] | |
refx = xcoords[i-1] # corresponds to col, counts from 0 | |
refy = ycoords[i-1] # corresponds to row | |
dummy = 0 | |
R = np.sqrt(sizes1[i-1])/2 | |
for j in range(slice_row.start, slice_row.stop): | |
for k in range(slice_col.start, slice_col.stop): | |
# for each pixel in an Island | |
fluxThisPx = noisy[j, k] | |
flux_ls.append(fluxThisPx) | |
dummy += fluxThisPx | |
# model_Island += model1(fluxThisPx, j, k, refy, refx, R) | |
model_ls.append(model1(fluxThisPx, j, k, refy, refx, R)) # takes care of using square around structure | |
# print fluxThisPx, model_Island | |
# print dummy, model_Island, data1[i-1] | |
# print N_k_1, int(sizes1[i-1]) | |
L1.append(likelihood(model_ls, flux_ls, sigma)) | |
# get image that demonstrates Islands that I am using to calculate likelihood | |
image_cln[slice_row, slice_col] = noisy[slice_row, slice_col] | |
# L2_slicing_row = slice(slice_row.start-neighbor_n_L2, slice_row.stop+neighbor_n_L2) | |
# L2_slicing_col = slice(slice_col.start-neighbor_n_L2, slice_col.stop+neighbor_n_L2) | |
# image_cln_neighbor_L2[L2_slicing_row, L2_slicing_col] = noisy[L2_slicing_row, L2_slicing_col] | |
fig = plt.figure(figsize=(12, 4)) | |
ax2 = fig.add_subplot(131) | |
ax2.imshow(noisy, cmap=plt.cm.rainbow) | |
ax3 = fig.add_subplot(132) | |
ax3.imshow(image_found, cmap=plt.cm.rainbow) | |
ax4 = fig.add_subplot(133) | |
# cleaned up extreme sizes source | |
ax4.imshow(image_cln, cmap=plt.cm.rainbow) | |
save = raw_input('save figure? y/n') | |
if save == 'y': | |
fig.savefig(place+name+'foundCln.eps', dvi=600) | |
fig.show() | |
plt.figure() | |
plt.imshow(label_clean) | |
if save == 'y': | |
plt.savefig(place+name+'structCln.eps', dvi=600) | |
plt.show() | |
# it doesn't seem obvious, but shows the flaw of the method that it will identify strong 5sigma pixels as a separate source even though it could be part of a strong source (the MF_templated that they are gaussian over several pixels). | |
# Identified the islands as separate souce is problematic in some degree because we can ended up getting more number of sources than it actually is (we know that because we simulated the map). | |
# In theory, one can separate them out by grouping sources being too close as one, but I think this way can speed things up since it doesn't hurt to exclude these pixels as long as we are doing aperture photometry. On the other hand, if one keeps the source, one will lose time on redundant aperture photometry as well as cross identification at the end. | |
# if we didn't know the number of sources and extent a priori, the pixel would possibly be another real source using this island grouping method (cross pattern). | |
########################### | |
# calculate likelihood in the following regions and compare | |
# 1) taking pixels on the cross structure, the very preliminary step, how likely? | |
# 2) DEPRECATED -- taking island pixels on the cross and neighbouring pixels | |
# 3) taking island pixels and pixels within a radius from the CM of the island pixel | |
########################### | |
# ---- aperture fluxes on "toy" image ---- | |
# entirely pixel-based | |
cropsize = int(3 * np.sqrt(beam_px)) | |
bcg = sigma | |
L3 = {} | |
for i, (x, y) in enumerate(zip(xcoords, ycoords)): | |
xint = int(x) | |
yint = int(y) | |
L3_ls = [] | |
if (x - cropsize < 0) or (y - cropsize < 0) or (x + cropsize > noisy.shape[1]) or (y + cropsize > noisy.shape[0]): | |
# near edge of image | |
pass | |
else: | |
# crop square | |
small = noisy[yint-cropsize:yint+cropsize+1, xint-cropsize:xint+cropsize+1].copy().astype(np.float64) # - bcg # don't have to subtract background since we are taking covariance matrix in likelihood calculation, and we are not doing aperture photometry (i.e.getting instrinsic flux) | |
# all indeces of the small gridded image | |
yind, xind = np.indices(small.shape) | |
# assume that centre is the same as the peak pixel (zero indexed) | |
# recenter | |
inner_quarter_x = slice(np.floor(xind.max()/4), np.ceil(xind.max()/4 * 3)) | |
inner_quarter_y = slice(np.floor(yind.max()/4), np.ceil(yind.max()/4 * 3)) | |
_small = np.zeros((small.shape[0], small.shape[1])) | |
_small[inner_quarter_y, inner_quarter_x] = small[inner_quarter_y, inner_quarter_x] | |
ycen, xcen = ndimage.measurements.maximum_position(_small) | |
# change the peak to be 0, 0 and calculate radius | |
xind -= xcen | |
yind -= ycen | |
rad = np.sqrt(xind**2 + yind**2) # approach 3 | |
plt.figure(figsize=(20, 6)) | |
plt.subplot(131) | |
plt.imshow(_small) | |
plt.subplot(132) | |
plt.imshow(small) | |
# plt.scatter(xind, yind, color='black', alpha=0.02) | |
# plt.scatter(xind+xcen, yind+ycen, color='red', alpha=0.02) | |
plt.scatter(xcen, ycen, color='blue') | |
# plt.show() | |
# # calculate flux in the apertures L3, radius | |
L3_size = range(int(np.ceil(size_min)), int(np.ceil(1.2*beam_px))) | |
for lsize in L3_size: | |
model3_ls = [] | |
mask = rad <= lsize | |
cts_ls = list(small[np.where(mask)]) | |
# counts = small[np.where(mask)].sum() | |
idx = np.where(mask) # tuple | |
for px_XYpair in zip(idx[0], idx[1]): | |
h = px_XYpair[1] | |
v = px_XYpair[0] | |
fluxThisPx = small[v, h] | |
model3_ls.append(model1(fluxThisPx, v, h, ycen, xcen, lsize)) | |
L3_ls.append(likelihood(model3_ls, cts_ls, sigma)) | |
L3[str(i)] = L3_ls | |
plt.subplot(133) | |
plt.plot(L3_size, L3_ls) | |
plt.plot(np.sqrt(sizes1[i]/np.pi), L1[i], "r*") | |
plt.yticks([]) | |
plt.ylabel(r'$\rm ln\ \cal L$') # minimize ln(L) = max L | |
plt.xlabel('Radial extent [px]') | |
# save = raw_input('save figure? y/n') | |
if save == 'y': | |
plt.savefig(place+name+str(i)+'L3.eps', dvi=600) | |
# plt.show() | |
# Likelihood not as distinct if SNR is low (~<10 ) | |
# significantly better if no source is around, and if SNR is high |
Not a concern anymore...
---- Clear out extremely small sizes and large sizes ------
" Based on the fact that the sources I am looking for should smear out to larger than 1 pixel for above 3 sigma"
some random fixing for now, need to fix later!
size_min = 1.25 # now a "random" pick
# size_min = beam_major_arcsec if beam_major_arcsec < beam_minor_arcsec else beam_minor_arcsec # if convolving with beam
size_max = 100 # now a "random" pick...
template .. 2R -> 2R^2
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Not a concern anymore...
R spans 1 - 7 px. beam size should be more sensible.