kchawla-pi/post-refactor.py

## post-refactor.py

def _download_spm_auditory_data(data_dir, subject_dir, subject_id):
    print("Data absent, downloading...")
    url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
           "MoAEpilot.zip")
    archive_path = os.path.join(subject_dir, os.path.basename(url))
    _fetch_file(url, subject_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print("Archive corrupted, trying to download it again.")
        return fetch_spm_auditory(data_dir=data_dir, data_name="",
                                  subject_id=subject_id)


def _prepare_downloaded_spm_auditory_data(subject_dir):
    """ Uncompresses downloaded spm_auditory dataset and organizes
    the data into apprpriate directories.

    Parameters
    ----------
    subject_dir: string
        Path to subject's data directory.

    Returns
    -------
    _subject_data: skl.Bunch object
        Scikit-Learn Bunch object containing data of a single subject
         from the SPM Auditory dataset.

    """
    subject_data = {}
    for file_name in SPM_AUDITORY_DATA_FILES:
        file_path = os.path.join(subject_dir, file_name)
        if os.path.exists(file_path):
            subject_data[file_name] = file_path
        else:
            print("%s missing from filelist!" % file_name)
            return None

    _subject_data = {}
    _subject_data["func"] = sorted(
            [subject_data[x] for x in subject_data.keys()
             if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))])

    # volumes for this dataset of shape (64, 64, 64, 1); let's fix this
    for x in _subject_data["func"]:
        vol = nib.load(x)
        if len(vol.shape) == 4:
            vol = nib.Nifti1Image(vol.get_data()[:, :, :, 0],
                                  vol.affine)
            nib.save(vol, x)

    _subject_data["anat"] = [subject_data[x] for x in subject_data.keys()
                             if re.match("^sM00223_002\.img$",
                                         os.path.basename(x))][0]

    # ... same thing for anat
    vol = nib.load(_subject_data["anat"])
    if len(vol.shape) == 4:
        vol = nib.Nifti1Image(vol.get_data()[:, :, :, 0],
                              vol.affine)
        nib.save(vol, _subject_data["anat"])

    return Bunch(**_subject_data)


def _make_path_events_file_spm_auditory_data(spm_auditory_data):
    """
    Accepts data for spm_auditory dataset as Bunch
    and constructs the filepath for its events descriptor file.
    Parameters
    ----------
    spm_auditory_data: Bunch

    Returns
    -------
    events_filepath: string
        Full path to the events.tsv file for spm_auditory dataset.
    """
    events_file_location = os.path.dirname(spm_auditory_data['func'][0])
    events_filename = os.path.basename(events_file_location) + '_events.tsv'
    events_filepath = os.path.join(events_file_location, events_filename)
    return events_filepath


def _make_events_file_spm_auditory_data(events_filepath):
    """
    Accepts destination filepath including filename and
    creates the events.tsv file for the spm_auditory dataset.

    Parameters
    ----------
    events_filepath: string
        The path where the events file will be created;

    Returns
    -------
    None

    """
    tr = 7.
    epoch_duration = 6 * tr  # duration in seconds
    conditions = ['rest', 'active'] * 8
    n_blocks = len(conditions)
    duration = epoch_duration * np.ones(n_blocks)
    onset = np.linspace(0, (n_blocks - 1) * epoch_duration, n_blocks)
    events = pd.DataFrame(
            {'onset': onset, 'duration': duration, 'trial_type': conditions})
    events.to_csv(events_filepath, sep='\t', index=False,
                       columns=['onset', 'duration', 'trial_type'])


def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
                       subject_id="sub001", verbose=1):
    """Function to fetch SPM auditory single-subject data.
    Parameters
    ----------
    data_dir: string
        Path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.
    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image
    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/
    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)
    if not os.path.exists(subject_dir):
        _download_spm_auditory_data(data_dir, subject_dir, subject_id)
    spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir)
    try:
        spm_auditory_data['events']
    except KeyError:
        events_filepath = _make_path_events_file_spm_auditory_data(
                                                            spm_auditory_data)
        if not os.path.isfile(events_filepath):
            _make_events_file_spm_auditory_data(events_filepath)
        spm_auditory_data['events'] = events_filepath
    return spm_auditory_data

	def _download_spm_auditory_data(data_dir, subject_dir, subject_id):
	print("Data absent, downloading...")
	url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
	"MoAEpilot.zip")
	archive_path = os.path.join(subject_dir, os.path.basename(url))
	_fetch_file(url, subject_dir)
	try:
	_uncompress_file(archive_path)
	except:
	print("Archive corrupted, trying to download it again.")
	return fetch_spm_auditory(data_dir=data_dir, data_name="",
	subject_id=subject_id)


	def _prepare_downloaded_spm_auditory_data(subject_dir):
	""" Uncompresses downloaded spm_auditory dataset and organizes
	the data into apprpriate directories.

	Parameters
	----------
	subject_dir: string
	Path to subject's data directory.

	Returns
	-------
	_subject_data: skl.Bunch object
	Scikit-Learn Bunch object containing data of a single subject
	from the SPM Auditory dataset.

	"""
	subject_data = {}
	for file_name in SPM_AUDITORY_DATA_FILES:
	file_path = os.path.join(subject_dir, file_name)
	if os.path.exists(file_path):
	subject_data[file_name] = file_path
	else:
	print("%s missing from filelist!" % file_name)
	return None

	_subject_data = {}
	_subject_data["func"] = sorted(
	[subject_data[x] for x in subject_data.keys()
	if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))])

	# volumes for this dataset of shape (64, 64, 64, 1); let's fix this
	for x in _subject_data["func"]:
	vol = nib.load(x)
	if len(vol.shape) == 4:
	vol = nib.Nifti1Image(vol.get_data()[:, :, :, 0],
	vol.affine)
	nib.save(vol, x)

	_subject_data["anat"] = [subject_data[x] for x in subject_data.keys()
	if re.match("^sM00223_002\.img$",
	os.path.basename(x))][0]

	# ... same thing for anat
	vol = nib.load(_subject_data["anat"])
	if len(vol.shape) == 4:
	vol = nib.Nifti1Image(vol.get_data()[:, :, :, 0],
	vol.affine)
	nib.save(vol, _subject_data["anat"])

	return Bunch(**_subject_data)


	def _make_path_events_file_spm_auditory_data(spm_auditory_data):
	"""
	Accepts data for spm_auditory dataset as Bunch
	and constructs the filepath for its events descriptor file.
	Parameters
	----------
	spm_auditory_data: Bunch

	Returns
	-------
	events_filepath: string
	Full path to the events.tsv file for spm_auditory dataset.
	"""
	events_file_location = os.path.dirname(spm_auditory_data['func'][0])
	events_filename = os.path.basename(events_file_location) + '_events.tsv'
	events_filepath = os.path.join(events_file_location, events_filename)
	return events_filepath


	def _make_events_file_spm_auditory_data(events_filepath):
	"""
	Accepts destination filepath including filename and
	creates the events.tsv file for the spm_auditory dataset.

	Parameters
	----------
	events_filepath: string
	The path where the events file will be created;

	Returns
	-------
	None

	"""
	tr = 7.
	epoch_duration = 6 * tr # duration in seconds
	conditions = ['rest', 'active'] * 8
	n_blocks = len(conditions)
	duration = epoch_duration * np.ones(n_blocks)
	onset = np.linspace(0, (n_blocks - 1) * epoch_duration, n_blocks)
	events = pd.DataFrame(
	{'onset': onset, 'duration': duration, 'trial_type': conditions})
	events.to_csv(events_filepath, sep='\t', index=False,
	columns=['onset', 'duration', 'trial_type'])


	def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
	subject_id="sub001", verbose=1):
	"""Function to fetch SPM auditory single-subject data.
	Parameters
	----------
	data_dir: string
	Path of the data directory. Used to force data storage in a specified
	location. If the data is already present there, then will simply
	glob it.
	Returns
	-------
	data: sklearn.datasets.base.Bunch
	Dictionary-like object, the interest attributes are:
	- 'func': string list. Paths to functional images
	- 'anat': string list. Path to anat image
	References
	----------
	:download:
	http://www.fil.ion.ucl.ac.uk/spm/data/auditory/
	"""
	data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
	verbose=verbose)
	subject_dir = os.path.join(data_dir, subject_id)
	if not os.path.exists(subject_dir):
	_download_spm_auditory_data(data_dir, subject_dir, subject_id)
	spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir)
	try:
	spm_auditory_data['events']
	except KeyError:
	events_filepath = _make_path_events_file_spm_auditory_data(
	spm_auditory_data)
	if not os.path.isfile(events_filepath):
	_make_events_file_spm_auditory_data(events_filepath)
	spm_auditory_data['events'] = events_filepath
	return spm_auditory_data