Source code for astroML.datasets.nasa_atlas
"""
NASA Sloan Atlas dataset size reduction
---------------------------------------
The NASA Sloan Atlas dataset is contained in a ~0.5GB available at
http://www.nsatlas.org/data
This function fetches a ~50MB subset of that data. This subset is created
using the code that can be found at examples/datasets/truncate_nsa_data.py
"""
import os
import numpy as np
from .tools import download_with_progress_bar
from . import get_data_home
DATA_URL = ('https://github.com/astroML/astroML-data/raw/master/datasets/'
'nsa_v0_1_2_reduced.npy')
ARCHIVE_FILE = os.path.basename(DATA_URL)
[docs]def fetch_nasa_atlas(data_home=None,
download_if_missing=True):
"""Loader for NASA galaxy atlas data
Parameters
----------
data_home : optional, default=None
Specify another download and cache folder for the datasets. By default
all astroML data is stored in '~/astroML_data'.
download_if_missing : optional, default=True
If False, raise a IOError if the data is not locally available
instead of trying to download the data from the source site.
Returns
-------
data : ndarray
The data, in the form of a numpy record array.
Notes
-----
This is the file created by the example script at
examples/datasets/truncate_nsa_data.py
For an explanation of the meaning of the fields, see the description at
http://www.nsatlas.org/data
"""
data_home = get_data_home(data_home)
archive_file = os.path.join(data_home, ARCHIVE_FILE)
if not os.path.exists(archive_file):
if not download_if_missing:
raise IOError('data not present on disk. '
'set download_if_missing=True to download')
print("downloading NASA atlas data from %s to %s"
% (DATA_URL, data_home))
buf = download_with_progress_bar(DATA_URL, return_buffer=True)
data = np.load(buf)
np.save(archive_file, data)
else:
data = np.load(archive_file)
return data