Skip to content

Instantly share code, notes, and snippets.

@astro313
Last active August 29, 2015 14:19
Show Gist options
  • Save astro313/85206bf29f67463046ab to your computer and use it in GitHub Desktop.
Save astro313/85206bf29f67463046ab to your computer and use it in GitHub Desktop.
DataMining Project
from matplotlib import pyplot as plt
import numpy as np
import matplotlib
from astropy.io import fits
from astropy.convolution import Gaussian2DKernel
from scipy import ndimage, stats, interpolate, signal
import random
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.size'] = 18
plt.rcParams['figure.figsize'] = (12, 8)
def power_fit(S, k, gamma):
"""
source amplitude distributed as power law
Input:
------
S: float
flux density, or counts
k: float
scaling factor
gamma: float
spectral index
"""
return k * S ** gamma
def read_data_fromtxt(filename):
"""
Return:
np.array format
"""
import astropy.io.ascii
data = astropy.io.ascii.read(filename, comment='^#')
if len(filename) == 0:
errstr = "No data read from %s" % filename
raise IOError(errstr)
return np.asarray(data)
def getColn(dataCounts, n):
"""
return nth col of data
Input:
data: array
n: int
Returns:
data: array
nth col of data
"""
if not isinstance(n, int):
raise TypeError('n MUST be an integer! ')
data = []
for i in np.arange(len(dataCounts)):
data.append(dataCounts[i][n])
return np.array(data)
def conf(beam, para_gamma=2.5, q=5.):
"""
due to faint source fluctuation, look at lecture slides
q is an arbitary parameter, and 5 is the typical value
gamma = 2.5 would be the Eculidean value
Input:
------
q: float
default = 5
gamma: float
2.1 for Condon best value
beam: float
sr
Returns:
--------
sigma: float
confusion sigma
"""
para_k = 10. ** 4 # dep on dnds, ~ 4 from Gal project
sigma = (q ** (3. - para_gamma) / (3 - para_gamma)) \
** (1. / (para_gamma - 1)) \
* (beam * para_k) ** (1. / (para_gamma - 1))
return sigma
def Eff_beam(beamSolidAngle, gamma=2.5):
"""
Calculate the effective beam area of instrument based on Condon 1974, depends on gamma;
The functional form here uses Condon's P(D) approach, which calculates confusion due to weak sources (fluctuations), inplicit in gamma, where gamma relates to the # of counts in dnds
Input:
------
gamma: float
default to the best value from Condon = 2.1
Returns:
--------
eff_arcsec2: float
arcsec^2
"""
eff_arcsec2 = beamSolidAngle / (gamma - 1)
return eff_arcsec2
def PSF_beam(major, minor):
"""
Returns gaussian beam PSF in arcsec2
Input:
------
major: float
arcsec FWHM
minor: float
arcsec FWHM
"""
PSF = (np.pi / 4.) * major * minor / np.log(2)
return PSF
def posErr(sigma, FWHM, threshold):
"""
Calculate the possition accuracy based on Condon Memo:
\sigma_p = \sigma_{tot} * FWHM / (2 * threshold)
"""
pos_err = sigma * FWHM / (2. * threshold)
return pos_err
def Ns(snu_ax, dnds):
"""
Make distribution of N as a function of S
"""
Delta_S = []
s_last = 0
for i, s in enumerate(snu_ax):
Delta_S.append(s-s_last)
s_last = s
Delta_S = np.array(Delta_S)
N = np.zeros(np.shape(dnds))
for i in range(len(N)):
N[i] = dnds[i] * Delta_S[i]
return N
class initializeMCMC():
def __init__(self):
self.lowlim = np.array([3*sigma, 0, 0, 0.1])
self.uplim = np.array([700, Imsize, Imsize, beamGauss])
def generate_initial_values(self, ndim, nwalkers, initvals, initsigma):
""" Generate a set of nvalues initial parameters.
Parameters
----------
initvals : ndarray
Initial parameter values in order A, x, y, R.
initsigma : ndarray
Sigma values for each parameter.
Returns
-------
p0 : ndarray
A nwalkers x ndim array of initial values.
Notes
-----
This is only interesting because it has to obey parameter limits.
As a consequence, the user-provided initial values may be ignored.
This will take care of fixed parameters correctly.
"""
if len(initvals) != ndim:
raise ValueError("Initial values not expected length")
if len(initsigma) != ndim:
raise ValueError("Initial sigma values not expected length")
# First -- see if any of the initial values lie outside
# the upper/lower limits.
outside_limits = [False] * ndim
for i, val in enumerate(initvals):
#Everything has a lower limit...
if val < self.lowlim[i]:
outside_limits[i] = True
elif val > self.uplim[i]:
outside_limits[i] = True
# Adjust initial values if necessary. We try to keep them
# close to the user provided value, just shifting into the
# range by a few sigma. If the range is too small, just
# stick it in the middle.
int_init = np.zeros(ndim)
for i in range(ndim):
if outside_limits[i]:
par_range = self.uplim[i] - self.lowlim[i]
if par_range <= 0:
errmsg = "Limits on parameter {:d} cross"
raise ValueError(errmsg.format(i))
if 2.0 * initsigma[i] >= par_range:
int_init[i] = self.lowlim[i] + 0.5 * par_range
else:
# Figure out if we are below or above
if initvals[i] < self.lowlim[i]:
int_init[i] = self.lowlim[i] + 2 * initsigma[i]
else:
int_init[i] = self.uplim[i] - 2 * initsigma[i]
else:
int_init[i] = initvals[i]
# Make p0 (output)
p0 = np.zeros((nwalkers, ndim))
maxiters = 100
for i in range(ndim):
lwlim = self.lowlim[i]
hs_uplim = 1
uplim = self.uplim[i]
pvec = initsigma[i] * np.random.randn(nwalkers) + \
int_init[i]
# Now reject and regenerate anything outside limits
if hs_uplim:
badidx = np.logical_or(pvec > uplim,
pvec < lwlim).nonzero()[0]
else:
badidx = np.nonzero(pvec < lwlim)[0]
iters = 0
nbad = len(badidx)
while nbad > 0:
pvec[badidx] = initsigma[i] * np.random.randn(nbad) + \
int_init[i]
iters += 1
if hs_uplim:
badidx = \
np.logical_or(pvec > uplim,
pvec < lwlim).nonzero()[0]
else:
badidx = np.nonzero(pvec < lwlim)[0]
nbad = len(badidx)
if iters > maxiters:
errmsg = "Too many iterations initializing param {:d}"
raise Exception(errmsg.format(i))
p0[:, i] = pvec
return p0
def chain_iter(sam, nwalkers, nburn):
par = ('A', 'x', 'y', 'R')
plt.figure(figsize=[20, 10])
for i, p in enumerate(par):
ncols = (len(par)/2 + 1) if len(par)%2 != 0 else len(par)/2
plt.subplot(len(par) / 2, ncols, i + 1)
for w in range(nwalkers):
plt.plot(np.arange(sam.chain.shape[1]), sam.chain[w, :, i], 'b-', alpha=0.1)
plt.ylabel(p)
plt.xlabel('Iter')
aymin, aymax = plt.ylim()
plt.vlines(nburn, aymin, aymax, linestyle=':')
# plt.suptitle('performance of each paramater as a function of iter', fontsize=14, y=1.1)
plt.show()
def plotChain(sam):
f, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
ax1 = axs[0, 0]
# h1 = ax1.hist(sam.chain[:,:,0].reshape((-1, 4)))
h1 = ax1.hist(sam.chain[:, :, 0].flatten())
ax1.set_xlabel('A')
ax1.set_ylabel(r'$\cal L$')
ax1.set_yticks([])
ax2 = axs[0, 1]
h2 = ax2.hist(sam.chain[:,:,1].flatten())
ax2.set_xlabel('x ')
ax2.set_ylabel(r'$\cal L$')
ax2.set_yticks([])
ax3 = axs[1, 0]
h3 = ax3.hist(sam.chain[:,:,2].flatten())
ax3.set_xlabel('y')
ax3.set_ylabel(r'$\cal L$')
ax3.set_yticks([])
ax4 = axs[1, 1]
h4 = ax4.hist(sam.chain[:,:,3].flatten())
ax4.set_xlabel('R')
ax4.set_ylabel(r'$\cal L$')
ax4.set_yticks([])
f.show()
def MFtemplate(S, posX, posY, thetaX, thetaY, arcsecPerPx, R):
"""
Assuming template to be the shape of source before convolving with beam, assumping independent of frequency for now or single-frequency.
Params: {thetaX, thetaY, S, R}
thetaX, thetaY = position
S = amplitude
R = spatial extent
Assuming 2D gaussian
Inputs:
-------
S: float
posX: int
posY: int
thetaX: float
thetaY: float
arcsecPerPx: float
R: float
Returns:
--------
CircularGauss: float
the flux at (x,y) given some extent of source
"""
stuff = - ((posX*arcsecPerPx - thetaX) ** 2 + (posY*arcsecPerPx - thetaY) ** 2) / (2. * R**2)
CircularGauss = S * np.exp(stuff)
return CircularGauss
# ---------------------
path2Counts = '../Counts/'
f_Counts = 'Bethermin_model_counts_250um.txt'
dataCounts = read_data_fromtxt(path2Counts + f_Counts)
amp = getColn(dataCounts, 0)
dnds = getColn(dataCounts, 1)
###############################################
# --- Assignemnt -------
# a few things (3) we can tweak:
# larger beam becomes confusion noise dominated
# larger image size, no source is overlapping unless
# we increase the number of sources as well
###############################################
beam_major_arcsec = 5.0 # arcsec, FWHM
beam_minor_arcsec = 5.0
arcsec2rad = 1 / 206265.
Imsize = 128
arcsecPerPx = 1.0 # 1 px = 1 arcsec
N_s = 1
delta = 0.1
SNR = 10.
max_R = beam_major_arcsec / (2 * np.sqrt(np.log(2)))
beamGauss = PSF_beam(beam_major_arcsec, beam_minor_arcsec) # arcsec^2
beam_px = arcsecPerPx * beamGauss
effPSF_arcsec2 = Eff_beam(beamGauss)
# ---- Simulate map --------
im = np.zeros((Imsize, Imsize))
for k in range(N_s):
Ux = random.uniform(0+delta, 1-delta)
thetaX = Imsize * Ux
Uy = random.uniform(0+delta, 1-delta)
thetaY = Imsize * Uy
R = random.uniform(1, max_R) # extent
flux = stats.powerlaw.rvs(amp.mean()) * 100 * amp.mean() # say mJy
print "Simulating source properties: S={:f}, cenx={:f}, ceny={:f}, R={:f}".format(flux, thetaX, thetaY, R)
im[thetaY, thetaX] += flux
kernel_source = Gaussian2DKernel(R)
kernel_source.normalize()
im = signal.fftconvolve(im, kernel_source)
_resize_0 = slice(abs(Imsize-im.shape[0])/2, im.shape[0]-abs(Imsize-im.shape[0])/2)
_resize_1 = slice(abs(Imsize-im.shape[1])/2, im.shape[1]-abs(Imsize-im.shape[1])/2)
im = im[_resize_0, _resize_1]
kernel = Gaussian2DKernel(beam_major_arcsec * arcsecPerPx)
kernel.normalize()
image = signal.fftconvolve(im, kernel) # with zero-padding
resize_slice_0 = slice(abs(Imsize-image.shape[0])/2, image.shape[0]-abs(Imsize-image.shape[0])/2)
resize_slice_1 = slice(abs(Imsize-image.shape[1])/2, image.shape[1]-abs(Imsize-image.shape[1])/2)
image = image[resize_slice_0, resize_slice_1] # take out zero-padding
model_noiseSTD = image.max() / SNR
noise = stats.norm.rvs(0.0, model_noiseSTD, size=(Imsize, Imsize)) # mJy
cov_n = np.cov(noise.T)
noisy = image + noise # zero-mean noise
print "Image dtype: %s" % (noisy.dtype)
print "Image size: %6d" % (noisy.size)
print "Image shape: %3dx%3d" % (noisy.shape[0], noisy.shape[1])
print "Max value %1.2f at pixel %6d" % (noisy.max(), noisy.argmax())
print "Min value %1.2f at pixel %6d" % (noisy.min(), noisy.argmin())
# fig = plt.figure(figsize=(12, 4))
# ax2 = fig.add_subplot(133)
# ax2.imshow(noisy, cmap=plt.cm.rainbow)
# ax3 = fig.add_subplot(131)
# ax3.imshow(im, cmap=plt.cm.rainbow)
# ax5 = fig.add_subplot(132)
# ax5.imshow(image, cmap=plt.cm.rainbow)
# save = raw_input('save figure? y/n')
# if save == 'y':
# fig.savefig(place+name+'sim.eps', dvi=600)
# fig.show()
# ------ get noise ---------
sigma_c = abs(conf(effPSF_arcsec2/(206265.)**2)) # Jy
Jy2mJy = 1.e3
print "Confusion: {:.2f} [mJy]".format(sigma_c * Jy2mJy) # mJy
# Get map noise
sigma_map = abs(np.std(noisy))
print "Map noise: {:.2f} [mJy]".format(sigma_map)
sigma = sigma_c if sigma_c > sigma_map else sigma_map
# Getting filtered positions ---------
threshold5sig = 5 * sigma
threshold3sig = 3 * sigma
mask3sigma = (noisy >= threshold3sig)
# --- source found above 3 sigma-------
label3sig_im, nb3sig_labels = ndimage.label(mask3sigma)
# nb_labels - 1 = # found; label3sig_im is the list of slices with T-F matrix containing each of the found candidates; label.max() = nb_labels
image_found = np.zeros((noisy.shape[0], noisy.shape[1]))
for i in range(1, nb3sig_labels+1):
slice_row, slice_col = ndimage.find_objects(label3sig_im == i)[0]
# and has at least one pixel > 5 sigma
if not (noisy[slice_row, slice_col] > threshold5sig).any() is False:
image_found[slice_row, slice_col] = noisy[slice_row, slice_col]
# just to demonstrate source found --
# fig = plt.figure()
# ax2 = fig.add_subplot(211)
# ax2.imshow(noisy, cmap=plt.cm.rainbow)
# ax4 = fig.add_subplot(212)
# ax4.imshow(image_found, cmap=plt.cm.rainbow)
# fig.show()
_SrcMsk = mask3sigma
_labelSrc, _nbSrc = label3sig_im, nb3sig_labels
size_min = np.floor(np.sqrt(beam_major_arcsec))
print "** before **"
#print np.unique(_labelSrc) # remember everything else is 0
print _nbSrc
print "-"*10
for i in range(1, _nbSrc+1):
slice_row, slice_col = ndimage.find_objects(_labelSrc == i)[0]
# if too small
if ((slice_col.stop - slice_col.start) < size_min) | ((slice_row.stop - slice_row.start) < size_min):
_labelSrc[slice_row, slice_col] = 0
labelFiltered_arr = np.unique(_labelSrc)
print "** after **"
print len(labelFiltered_arr) - 1
# make a new label based on the cleaned up candidates, and re-count
label_clean = np.searchsorted(labelFiltered_arr, _labelSrc)
# make Bool mask
msk_clnSrc = np.where(label_clean != 0, label_clean, 0)
_idx_msk = np.where(msk_clnSrc != 0)
msk_clnSrc[_idx_msk] = 1
nb_clnSrc = np.unique(label_clean)[1:] # excluding those with label-0, which is not a source
# get CM of each candidates ----
coords = ndimage.center_of_mass(noisy, label_clean, nb_clnSrc)
# coords = ndimage.maximum_position(noisy, label_clean, nb_clnSrc)
xcoords = np.array([k[1] for k in coords])
ycoords = np.array([k[0] for k in coords])
image_cln = np.zeros((noisy.shape[0], noisy.shape[1]))
sizes1 = ndimage.sum(msk_clnSrc, label_clean, nb_clnSrc) # extent
fluxes = ndimage.sum(noisy, label_clean, nb_clnSrc)
data1 = fluxes
for i in nb_clnSrc: # for each island
slice_row, slice_col = ndimage.find_objects(label_clean == i)[0]
image_cln[slice_row, slice_col] = noisy[slice_row, slice_col]
# fig = plt.figure(figsize=(12, 4))
# ax2 = fig.add_subplot(131)
# ax2.imshow(noisy, cmap=plt.cm.rainbow)
# ax3 = fig.add_subplot(132)
# ax3.imshow(image_found, cmap=plt.cm.rainbow)
# ax4 = fig.add_subplot(133)
# # cleaned up extreme sizes source
# ax4.imshow(image_cln, cmap=plt.cm.rainbow)
# save = raw_input('save figure? y/n')
# if save == 'y':
# fig.savefig(place+name+'foundCln.eps', dvi=600)
# fig.show()
# plt.figure()
# plt.imshow(label_clean)
# if save == 'y':
# plt.savefig(place+name+'structCln.eps', dvi=600)
# plt.show()
###################################
import emcee
nburn = 100
nwalkers = 500
nsteps = 1000
nthread = 3
def model_new(params, refx, refy):
"""
using CM as refx, refy
"""
Flux, posX, posY, extent = params
mPerPx = MFtemplate(Flux, posX, posY, refx, refy, arcsecPerPx, extent)
return mPerPx
def lnlike_new(p, tx, ty, y, n):
return -0.5 * np.sum(((y - model_new(p, tx, ty))/n) ** 2)
def lnprob(pars, refx, refy, y, e):
Flux, posX, posY, extent = pars
if (100 < Flux < 550 and (refx - beam_major_arcsec < posX < refx + beam_major_arcsec) and (refy - beam_major_arcsec < posY < refy + beam_major_arcsec) and 0.1 < extent < beamGauss):
return lnlike_new(pars, refx, refy, y, e)
else:
return -np.inf
for i in nb_clnSrc: # for each island
slice_row, slice_col = ndimage.find_objects(label_clean == i)[0]
flux_ls = []
refx = xcoords[i-1] # corresponds to col, counts from 0
refy = ycoords[i-1] # corresponds to row
print "CM x={:.1f}, y={:.1f}".format(refx, refy)
R = np.sqrt(sizes1[i-1])/2
initial = np.array([data1[i-1], refx, refy, R])
initial_sig = np.array([20, 2, 2, 1.])
ndim = len(initial)
Pre = initializeMCMC()
pos = Pre.generate_initial_values(ndim, nwalkers, initial, initial_sig)
for j in range(slice_row.start, slice_row.stop):
for k in range(slice_col.start, slice_col.stop):
# for each pixel in an Island
fluxThisPx = noisy[j, k]
flux_ls.append(fluxThisPx)
print sum(flux_ls)
# accounting for just 3 sigma, find best fit A, X, Y, R, note it's convolved with beam, so X, Y are more reliable than A, R in circular symmetric beam
#using MLE
import scipy.optimize as op
nll = lambda *args: -lnlike_new(*args)
result = op.minimize(nll, list(initial), args=(refx, refy, flux_ls, sigma))
res_a, res_x, res_y, res_r = result["x"] # doesn't work so well with the 3sigma structure, gives ok positions but crappy R and Amplitude
print result["x"]
# what about mcmc, imposing priors
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[refx, refy, flux_ls, sigma], threads=nthread)
print("Running burn-in...")
p0, prob, rstate = sampler.run_mcmc(pos, nburn)
sampler.reset()
print("Running production...")
sampler.run_mcmc(p0, nsteps, rstate0=rstate)
print(" Fit complete")
print(" Mean acceptance fraction: ", np.mean(sampler.acceptance_fraction))
acor = sampler.acor
print(" Autocorrelation time: {:f} {:f} {:f} {:f}").format(*acor)
chain_iter(sampler, nwalkers, nburn)
plotChain(sampler)
# get best-param
bestIdx_flat = sampler.lnprobability[:].argmax()
idx = np.unravel_index(bestIdx_flat, sampler.lnprobability.shape)
best_val_allParams = sampler.chain[idx[0], idx[1], :]
# mean +/-
samples = sampler.chain[:, :, :].reshape((-1, ndim))
vals = map(lambda v:(v[1], v[2]-v[1],v[1]-v[0]),zip(*np.percentile(samples, [16,50,84], axis=0)))
for i in range(len(vals)):
low, mean, up = vals[i]
print str(mean)+'+'+str(up)+'-'+str(low)
print "best: ", best_val_allParams
print "mean"
from matplotlib import pyplot as plt
import numpy as np
import matplotlib
from astropy.io import fits
from scipy import ndimage, stats, interpolate, signal
import random
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.size'] = 18
plt.rcParams['figure.figsize'] = (12, 8)
def power_fit(S, k, gamma):
"""
source amplitude distributed as power law
Input:
------
S: float
flux density, or counts
k: float
scaling factor
gamma: float
spectral index
"""
return k * S ** gamma
def read_data_fromtxt(filename):
"""
Return:
np.array format
"""
import astropy.io.ascii
data = astropy.io.ascii.read(filename, comment='^#')
if len(filename) == 0:
errstr = "No data read from %s" % filename
raise IOError(errstr)
return np.asarray(data)
def getColn(dataCounts, n):
"""
return nth col of data
Input:
data: array
n: int
Returns:
data: array
nth col of data
"""
if not isinstance(n, int):
raise TypeError('n MUST be an integer! ')
data = []
for i in np.arange(len(dataCounts)):
data.append(dataCounts[i][n])
return np.array(data)
def conf(beam, para_gamma=2.5, q=5.):
"""
due to faint source fluctuation, look at lecture slides
q is an arbitary parameter, and 5 is the typical value
gamma = 2.5 would be the Eculidean value
Input:
------
q: float
default = 5
gamma: float
2.1 for Condon best value
beam: float
sr
Returns:
--------
sigma: float
confusion sigma
"""
para_k = 10. ** 4 # dep on dnds, ~ 4 from Gal project
sigma = (q ** (3. - para_gamma) / (3 - para_gamma)) \
** (1. / (para_gamma - 1)) \
* (beam * para_k) ** (1. / (para_gamma - 1))
return sigma
def Eff_beam(beamSolidAngle, gamma=2.5):
"""
Calculate the effective beam area of instrument based on Condon 1974, depends on gamma;
The functional form here uses Condon's P(D) approach, which calculates confusion due to weak sources (fluctuations), inplicit in gamma, where gamma relates to the # of counts in dnds
Input:
------
gamma: float
default to the best value from Condon = 2.1
Returns:
--------
eff_arcsec2: float
arcsec^2
"""
eff_arcsec2 = beamSolidAngle / (gamma - 1)
return eff_arcsec2
def PSF_beam(major, minor):
"""
Returns gaussian beam PSF in arcsec2
Input:
------
major: float
arcsec FWHM
minor: float
arcsec FWHM
"""
PSF = (np.pi / 4.) * major * minor / np.log(2)
return PSF
def posErr(sigma, FWHM, threshold):
"""
Calculate the possition accuracy based on Condon Memo:
\sigma_p = \sigma_{tot} * FWHM / (2 * threshold)
"""
pos_err = sigma * FWHM / (2. * threshold)
return pos_err
def Ns(snu_ax, dnds):
"""
Make distribution of N as a function of S
"""
Delta_S = []
s_last = 0
for i, s in enumerate(snu_ax):
Delta_S.append(s-s_last)
s_last = s
Delta_S = np.array(Delta_S)
N = np.zeros(np.shape(dnds))
for i in range(len(N)):
N[i] = dnds[i] * Delta_S[i]
return N
def MFtemplate(S, posX, posY, thetaX, thetaY, arcsecPerPx, R):
"""
Assuming template to be the shape of source before convolving with beam, assumping independent of frequency for now or single-frequency.
Params: {thetaX, thetaY, S, R}
thetaX, thetaY = position
S = amplitude
R = spatial extent
Assuming 2D gaussian
Inputs:
-------
S: float
posX: int
posY: int
thetaX: float
thetaY: float
arcsecPerPx: float
R: float
Returns:
--------
CircularGauss: float
the flux at (x,y) given some extent of source
"""
stuff = - ((posX*arcsecPerPx - thetaX) ** 2 + (posY*arcsecPerPx - thetaY) ** 2) / (2. * R**2)
CircularGauss = S * np.exp(stuff)
return CircularGauss
# ---------------------
path2Counts = '../Counts/'
f_Counts = 'Bethermin_model_counts_250um.txt'
dataCounts = read_data_fromtxt(path2Counts + f_Counts)
amp = getColn(dataCounts, 0)
dnds = getColn(dataCounts, 1)
###############################################
# --- Assignemnt -------
# a few things (3) we can tweak:
# larger beam becomes confusion noise dominated
# larger image size, no source is overlapping unless
# we increase the number of sources as well
###############################################
beam_major_arcsec = 5.0 # arcsec, FWHM
beam_minor_arcsec = 5.0
strict = False
arcsec2rad = 1 / 206265.
Imsize = 256
arcsecPerPx = 1.0 # 1 px = 1 arcsec
N_s = 20
delta = 0.1
SNR = 10.
place = './Figures/'
name = 'Ns'+str(N_s)+'_SNR'+str(SNR)+'_Imsize'+str(Imsize)+'_beamFWHM'+str(beam_major_arcsec)+'_strict'+str(strict)
# neighbor_n_L2 = 1 # take the neighboring 1 px for likelihood method 2 for now
point_source = True
#max_R = 5
max_R = beam_major_arcsec / (2 * np.sqrt(np.log(2)))
if not point_source is False and max_R is None:
max_R = beam_major_arcsec / (2 * np.sqrt(np.log(2)))
beamGauss = PSF_beam(beam_major_arcsec, beam_minor_arcsec) # arcsec^2
beam_px = arcsecPerPx * beamGauss
effPSF_arcsec2 = Eff_beam(beamGauss)
# ---- Simulate map --------
im = np.zeros((Imsize, Imsize))
for k in range(N_s):
Ux = random.uniform(0+delta, 1-delta)
thetaX = Imsize * Ux
Uy = random.uniform(0+delta, 1-delta)
thetaY = Imsize * Uy
R = random.uniform(1, max_R) # extent
flux = stats.powerlaw.rvs(amp.mean()) * 10 * amp.mean() # say mJy
for i in range(Imsize):
for j in range(Imsize):
im[i, j] += MFtemplate(flux, i, j, thetaX, thetaY, arcsecPerPx, R)
from astropy.convolution import Gaussian2DKernel
kernel = Gaussian2DKernel(beam_major_arcsec * arcsecPerPx)
kernel.normalize()
image = signal.fftconvolve(im, kernel) # with zero-padding
resize_slice_0 = slice(abs(Imsize-image.shape[0])/2, image.shape[0]-abs(Imsize-image.shape[0])/2)
resize_slice_1 = slice(abs(Imsize-image.shape[1])/2, image.shape[1]-abs(Imsize-image.shape[1])/2)
image = image[resize_slice_0, resize_slice_1] # take out zero-padding
model_noiseSTD = image.max() / SNR
noise = stats.norm.rvs(0.0, model_noiseSTD, size=(Imsize, Imsize)) # mJy
cov_n = np.cov(noise.T)
noisy = image + noise # zero-mean noise
print "Image dtype: %s" % (noisy.dtype)
print "Image size: %6d" % (noisy.size)
print "Image shape: %3dx%3d" % (noisy.shape[0], noisy.shape[1])
print "Max value %1.2f at pixel %6d" % (noisy.max(), noisy.argmax())
print "Min value %1.2f at pixel %6d" % (noisy.min(), noisy.argmin())
fig = plt.figure(figsize=(12, 4))
ax2 = fig.add_subplot(133)
ax2.imshow(noisy, cmap=plt.cm.rainbow)
ax3 = fig.add_subplot(131)
ax3.imshow(im, cmap=plt.cm.rainbow)
ax5 = fig.add_subplot(132)
ax5.imshow(image, cmap=plt.cm.rainbow)
save = raw_input('save figure? y/n')
if save == 'y':
fig.savefig(place+name+'sim.eps', dvi=600)
fig.show()
# ------ get noise ---------
sigma_c = conf(effPSF_arcsec2/(206265.)**2) # Jy
Jy2mJy = 1.e3
print "Confusion: {:.2f} [mJy]".format(sigma_c * Jy2mJy) # mJy
# Get map noise
sigma_map = np.std(noisy)
print "Map noise: {:.2f} [mJy]".format(sigma_map)
sigma = sigma_c if sigma_c > sigma_map else sigma_map
# Getting filtered positions ---------
# Jy
threshold5sig = 5 * sigma
threshold3sig = 3 * sigma
# get the Bool grid for where the flux is above threshold
# mask5sigma = (noisy >= threshold5sig)
mask3sigma = (noisy >= threshold3sig) # & (noisy < threshold5sig)
# --- source found above 3 sigma-------
label3sig_im, nb3sig_labels = ndimage.label(mask3sigma)
# nb_labels - 1 = # found; label3sig_im is the list of slices with T-F matrix containing each of the found candidates; label.max() = nb_labels
image_found = np.zeros((noisy.shape[0], noisy.shape[1]))
for i in range(1, nb3sig_labels+1):
slice_row, slice_col = ndimage.find_objects(label3sig_im == i)[0]
# and has at least one pixel > 5 sigma
if not (noisy[slice_row, slice_col] > threshold5sig).any() is False:
image_found[slice_row, slice_col] = noisy[slice_row, slice_col]
# just to demonstrate source found --
# fig = plt.figure()
# ax2 = fig.add_subplot(211)
# ax2.imshow(noisy, cmap=plt.cm.rainbow)
# ax4 = fig.add_subplot(212)
# ax4.imshow(image_found, cmap=plt.cm.rainbow)
# fig.show()
# refine mask and label to be those identified as candidates where at least one containing px is greater than 5 sigma ----
# this turns out to be too strict if beam is larger than 5 arcsec, fluxes are smeared out, this becomes unrealistic
if strict is True:
_SrcMsk = (image_found >= threshold5sig)
_labelSrc, _nbSrc = ndimage.label(_SrcMsk)
else:
# use 3 sigma is fine
_SrcMsk = mask3sigma
_labelSrc, _nbSrc = label3sig_im, nb3sig_labels
# ################################################################
# ---- Clear out extremely single pixel islands------
# Based on the fact that the sources I am looking for should smear out to larger than 1 pixel for above 3 sigma"
# WARNING: THIS is invalid because of confusion as the large beam mixing sources together, --> "if only looking for point sources, then max_size should be beam size, else set to extremely high ~ no upper size limit"
# NO more upper limit on size
size_min = np.floor(np.sqrt(beam_major_arcsec))
print "** before **"
#print np.unique(_labelSrc) # remember everything else is 0
print _nbSrc
print "-"*10
for i in range(1, _nbSrc+1):
slice_row, slice_col = ndimage.find_objects(_labelSrc == i)[0]
# if too small
if ((slice_col.stop - slice_col.start) < size_min) | ((slice_row.stop - slice_row.start) < size_min):
_labelSrc[slice_row, slice_col] = 0
labelFiltered_arr = np.unique(_labelSrc)
print "** after **"
print len(labelFiltered_arr) - 1
print "Any difference? Note that this can demonstrate confusion \n"
# make a new label based on the cleaned up candidates, and re-count
label_clean = np.searchsorted(labelFiltered_arr, _labelSrc)
# make Bool mask
msk_clnSrc = np.where(label_clean != 0, label_clean, 0)
_idx_msk = np.where(msk_clnSrc != 0)
msk_clnSrc[_idx_msk] = 1
nb_clnSrc = np.unique(label_clean)[1:] # excluding those with label-0, which is not a source
def model1(Flux, posX, posY, refx, refy, extent):
mPerPx = MFtemplate(Flux, posX, posY, refx, refy, arcsecPerPx, extent)
return mPerPx
def likelihood(m_arr, data_arr, sigma):
pre_fac = -len(m_arr)/2 * np.log(2 * np.pi) - (0.5 * np.power(sigma, 2) *len(m_arr))
diff = np.asarray(data_arr) - np.asarray(m_arr)
second = -0.5 * np.power(np.sum(diff/sigma), 2)
ln_like = pre_fac + second
return ln_like
def uniformPrior(up, low, norm=False):
return 1./(up - low) if not norm is False else 1
## This unfortunately cannot be an optimization problem because my "data" is completely dependent on the threshold or non-parametric form that I have used to locate the source.
## Cannot use use mcmc for posterior, even though I know about the number of sources, but since they span over a pixel, and I am assuming a case where I have no knowledge of how extented these sources can be, there is no way I can compute the "data" of a source from the image.
# So all I can really do at this point is compare the likelihood that I get using different number of neighboring pixels, to estimtate or justify how likely the neighbor pixel contains part of the flux from the identified sources
# get CM of each candidates ----
coords = ndimage.center_of_mass(noisy, label_clean, nb_clnSrc)
# coords = ndimage.maximum_position(noisy, label_clean, nb_clnSrc)
xcoords = np.array([x[1] for x in coords]) # col 1 is horizonal
ycoords = np.array([y[0] for y in coords]) # col 0 is vertical
image_cln = np.zeros((noisy.shape[0], noisy.shape[1]))
image_cln_neighbor_L2 = np.zeros((noisy.shape[0], noisy.shape[1]))
# # case 1: using square around structure
sizes1 = ndimage.sum(msk_clnSrc, label_clean, nb_clnSrc) # extent
fluxes = ndimage.sum(noisy, label_clean, nb_clnSrc)
data1 = fluxes
L1 = []
for i in nb_clnSrc: # for each island
slice_row, slice_col = ndimage.find_objects(label_clean == i)[0]
# count the pix in each island
N_k_1 = abs(slice_row.start - slice_row.stop) * abs(slice_col.start - slice_col.stop)
model_Island = 0
model_ls = []
flux_ls = []
refx = xcoords[i-1] # corresponds to col, counts from 0
refy = ycoords[i-1] # corresponds to row
dummy = 0
R = np.sqrt(sizes1[i-1])/2
for j in range(slice_row.start, slice_row.stop):
for k in range(slice_col.start, slice_col.stop):
# for each pixel in an Island
fluxThisPx = noisy[j, k]
flux_ls.append(fluxThisPx)
dummy += fluxThisPx
# model_Island += model1(fluxThisPx, j, k, refy, refx, R)
model_ls.append(model1(fluxThisPx, j, k, refy, refx, R)) # takes care of using square around structure
# print fluxThisPx, model_Island
# print
# print dummy, model_Island, data1[i-1]
# print N_k_1, int(sizes1[i-1])
L1.append(likelihood(model_ls, flux_ls, sigma))
# get image that demonstrates Islands that I am using to calculate likelihood
image_cln[slice_row, slice_col] = noisy[slice_row, slice_col]
# L2_slicing_row = slice(slice_row.start-neighbor_n_L2, slice_row.stop+neighbor_n_L2)
# L2_slicing_col = slice(slice_col.start-neighbor_n_L2, slice_col.stop+neighbor_n_L2)
# image_cln_neighbor_L2[L2_slicing_row, L2_slicing_col] = noisy[L2_slicing_row, L2_slicing_col]
fig = plt.figure(figsize=(12, 4))
ax2 = fig.add_subplot(131)
ax2.imshow(noisy, cmap=plt.cm.rainbow)
ax3 = fig.add_subplot(132)
ax3.imshow(image_found, cmap=plt.cm.rainbow)
ax4 = fig.add_subplot(133)
# cleaned up extreme sizes source
ax4.imshow(image_cln, cmap=plt.cm.rainbow)
save = raw_input('save figure? y/n')
if save == 'y':
fig.savefig(place+name+'foundCln.eps', dvi=600)
fig.show()
plt.figure()
plt.imshow(label_clean)
if save == 'y':
plt.savefig(place+name+'structCln.eps', dvi=600)
plt.show()
# it doesn't seem obvious, but shows the flaw of the method that it will identify strong 5sigma pixels as a separate source even though it could be part of a strong source (the MF_templated that they are gaussian over several pixels).
# Identified the islands as separate souce is problematic in some degree because we can ended up getting more number of sources than it actually is (we know that because we simulated the map).
# In theory, one can separate them out by grouping sources being too close as one, but I think this way can speed things up since it doesn't hurt to exclude these pixels as long as we are doing aperture photometry. On the other hand, if one keeps the source, one will lose time on redundant aperture photometry as well as cross identification at the end.
# if we didn't know the number of sources and extent a priori, the pixel would possibly be another real source using this island grouping method (cross pattern).
###########################
# calculate likelihood in the following regions and compare
# 1) taking pixels on the cross structure, the very preliminary step, how likely?
# 2) DEPRECATED -- taking island pixels on the cross and neighbouring pixels
# 3) taking island pixels and pixels within a radius from the CM of the island pixel
###########################
# ---- aperture fluxes on "toy" image ----
# entirely pixel-based
cropsize = int(3 * np.sqrt(beam_px))
bcg = sigma
L3 = {}
for i, (x, y) in enumerate(zip(xcoords, ycoords)):
xint = int(x)
yint = int(y)
L3_ls = []
if (x - cropsize < 0) or (y - cropsize < 0) or (x + cropsize > noisy.shape[1]) or (y + cropsize > noisy.shape[0]):
# near edge of image
pass
else:
# crop square
small = noisy[yint-cropsize:yint+cropsize+1, xint-cropsize:xint+cropsize+1].copy().astype(np.float64) # - bcg # don't have to subtract background since we are taking covariance matrix in likelihood calculation, and we are not doing aperture photometry (i.e.getting instrinsic flux)
# all indeces of the small gridded image
yind, xind = np.indices(small.shape)
# assume that centre is the same as the peak pixel (zero indexed)
# recenter
inner_quarter_x = slice(np.floor(xind.max()/4), np.ceil(xind.max()/4 * 3))
inner_quarter_y = slice(np.floor(yind.max()/4), np.ceil(yind.max()/4 * 3))
_small = np.zeros((small.shape[0], small.shape[1]))
_small[inner_quarter_y, inner_quarter_x] = small[inner_quarter_y, inner_quarter_x]
ycen, xcen = ndimage.measurements.maximum_position(_small)
# change the peak to be 0, 0 and calculate radius
xind -= xcen
yind -= ycen
rad = np.sqrt(xind**2 + yind**2) # approach 3
plt.figure(figsize=(20, 6))
plt.subplot(131)
plt.imshow(_small)
plt.subplot(132)
plt.imshow(small)
# plt.scatter(xind, yind, color='black', alpha=0.02)
# plt.scatter(xind+xcen, yind+ycen, color='red', alpha=0.02)
plt.scatter(xcen, ycen, color='blue')
# plt.show()
# # calculate flux in the apertures L3, radius
L3_size = range(int(np.ceil(size_min)), int(np.ceil(1.2*beam_px)))
for lsize in L3_size:
model3_ls = []
mask = rad <= lsize
cts_ls = list(small[np.where(mask)])
# counts = small[np.where(mask)].sum()
idx = np.where(mask) # tuple
for px_XYpair in zip(idx[0], idx[1]):
h = px_XYpair[1]
v = px_XYpair[0]
fluxThisPx = small[v, h]
model3_ls.append(model1(fluxThisPx, v, h, ycen, xcen, lsize))
L3_ls.append(likelihood(model3_ls, cts_ls, sigma))
L3[str(i)] = L3_ls
plt.subplot(133)
plt.plot(L3_size, L3_ls)
plt.plot(np.sqrt(sizes1[i]/np.pi), L1[i], "r*")
plt.yticks([])
plt.ylabel(r'$\rm ln\ \cal L$') # minimize ln(L) = max L
plt.xlabel('Radial extent [px]')
# save = raw_input('save figure? y/n')
if save == 'y':
plt.savefig(place+name+str(i)+'L3.eps', dvi=600)
# plt.show()
# Likelihood not as distinct if SNR is low (~<10 )
# significantly better if no source is around, and if SNR is high
@astro313
Copy link
Author

Not a concern anymore...

beam_major_arcsec = 5.0    # arcsec
beam_minor_arcsec = 5.0
beam_major_arcsec = 0.05
beam_minor_arcsec = 0.05

R spans 1 - 7 px. beam size should be more sensible.

@astro313
Copy link
Author

Not a concern anymore...

---- Clear out extremely small sizes and large sizes ------
" Based on the fact that the sources I am looking for should smear out to larger than 1 pixel for above 3 sigma"

some random fixing for now, need to fix later!

size_min = 1.25     # now a "random" pick
# size_min = beam_major_arcsec if beam_major_arcsec < beam_minor_arcsec else beam_minor_arcsec      # if convolving with beam
size_max = 100      # now a "random" pick...

@astro313
Copy link
Author

astro313 commented May 9, 2015

template .. 2R -> 2R^2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment