Skip to content

Commit 6af4e46

Browse files
Jorge Fernandez HernandezJorge Fernandez Hernandez
authored andcommitted
EUCLIDMNGT-1447 retrieve multiple spectra
1 parent 234c624 commit 6af4e46

File tree

5 files changed

+189
-47
lines changed

5 files changed

+189
-47
lines changed

CHANGES.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ esa.euclid
3737
The method now only supports retrieval of MER (background‑subtracted) image cutouts. [#3559]
3838
- The ``get_product_list`` method now also returns file_name_list column when the product type belongs to
3939
BASIC_DOWNLOAD_DATA_PRODUCTS. [#3562]
40+
- The method ``get_spectrum`` accepts a single source_id or designation or multiple values separated by commas or a
41+
list. [#3570]
4042

4143

4244
vizier

astroquery/esa/euclid/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ class Conf(_config.ConfigNamespace):
1818
ROW_LIMIT = _config.ConfigItem(50,
1919
"Number of rows to return from database query (set to -1 for unlimited).")
2020

21+
SPECTRA_LIMIT = _config.ConfigItem(1000, "Maximum amount of sources with spectra")
22+
2123
USE_NAMES_OVER_IDS = _config.ConfigItem(True,
2224
"When converting from an astropy.io.votable.tree.TableElement object to "
2325
"an astropy.table.Table object, you can specify whether to give "

astroquery/esa/euclid/core.py

Lines changed: 128 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,19 @@
99
import binascii
1010
import os
1111
import pprint
12+
import re
13+
import stat
1214
import tarfile
1315
import zipfile
1416
from collections.abc import Iterable
1517
from datetime import datetime
18+
from datetime import timezone
1619

1720
from astropy import units
1821
from astropy import units as u
1922
from astropy.coordinates import Angle
23+
from astropy.io import fits
24+
from astropy.table import Table
2025
from astropy.units import Quantity
2126
from astropy.utils import deprecated_renamed_argument
2227
from requests.exceptions import HTTPError
@@ -45,6 +50,7 @@ class EuclidClass(TapPlus):
4550

4651
__VALID_DATALINK_RETRIEVAL_TYPES = conf.VALID_DATALINK_RETRIEVAL_TYPES
4752
__VALID_LINKING_PARAMETERS = conf.VALID_LINKING_PARAMETERS
53+
__regex_designation = re.compile(r"\s*(\S+)\s(-?\d+)\s*", flags=re.MULTILINE | re.UNICODE)
4854

4955
def __init__(self, *, environment='PDR', tap_plus_conn_handler=None, datalink_handler=None, cutout_handler=None,
5056
verbose=False, show_server_messages=True):
@@ -1517,15 +1523,15 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15171523
15181524
The spectrum associated with the source_id is downloaded as a compressed fits file, and the files it contains
15191525
are returned in a list. The compressed fits file is saved in the local path given by output_file. If this
1520-
parameter is not set, the result is saved in the file "<working
1521-
directory>/temp_<%Y%m%d_%H%M%S>/<source_id>.fits.zip". In any case, the content of the zip file is
1522-
automatically extracted.
1526+
parameter is not set, the result is saved in the file "<working directory>/temp_<%Y%m%d_%H%M%S>/
1527+
get_spectrum_output.zip". In any case, the content of the zip file is automatically extracted.
15231528
15241529
Parameters
15251530
----------
1526-
ids : str or int, mandatory
1527-
identifier for the spectrum
1528-
schema : str, mandatory, default 'sedm'
1531+
ids : str, int, str list or int list, mandatory
1532+
The identifier (<source_id>) or designation (<data-release>+blank+<source_id>). Can be a single designation
1533+
or id, a string with multiple values separated by commas, or a list.
1534+
schema : str, optional, default 'sedm'
15291535
the data release
15301536
retrieval_type : str, optional, default 'ALL' to retrieve all data from the list of sources
15311537
retrieval type identifier. Possible values are: 'SPECTRA_BGS' for the blue spectrum and 'SPECTRA_RGS' for
@@ -1536,7 +1542,7 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15361542
SOURCEPATCH_ID: the identifiers are considered as sourcepatch_id
15371543
output_file : str, optional
15381544
output file name. If no value is provided, a temporary one is created with the name
1539-
"<working directory>/temp_<%Y%m%d_%H%M%S>/<source_id>.fits"
1545+
"<working directory>/temp_<%Y%m%d_%H%M%S>/get_spectrum_output.zip"
15401546
verbose : bool, optional, default 'False'
15411547
flag to display information about the process
15421548
@@ -1548,19 +1554,41 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15481554
15491555
"""
15501556

1551-
if ids is None or schema is None:
1557+
if ids is None:
1558+
raise ValueError(self.__ERROR_MSG_REQUESTED_GENERIC)
1559+
1560+
if isinstance(ids, (list, tuple)) and not ids:
15521561
raise ValueError(self.__ERROR_MSG_REQUESTED_GENERIC)
15531562

15541563
rt = str(retrieval_type).upper()
15551564
if rt != 'ALL' and rt not in self.__VALID_DATALINK_RETRIEVAL_TYPES:
15561565
raise ValueError(f"Invalid argument value for 'retrieval_type'. Found {retrieval_type}, "
15571566
f"expected: 'ALL' or any of {self.__VALID_DATALINK_RETRIEVAL_TYPES}")
15581567

1559-
params_dict = {}
1568+
max_allow_elements = conf.SPECTRA_LIMIT
1569+
max_elements = 1
1570+
if isinstance(ids, str):
1571+
ids_arg = ids
1572+
if ',' in ids:
1573+
max_elements = ids.count(',')
1574+
elif isinstance(ids, int):
1575+
ids_arg = str(ids)
1576+
elif isinstance(ids, (list, tuple)):
1577+
max_elements = len(ids)
1578+
ids_arg = ','.join(str(item) for item in ids)
1579+
else:
1580+
raise ValueError(self.__ERROR_MSG_REQUESTED_GENERIC)
1581+
1582+
if not self.__regex_designation.search(ids_arg) and schema is None:
1583+
raise ValueError(f"Missing data release in: ids = {ids_arg} and schema = {schema} ")
1584+
1585+
if max_elements > max_allow_elements:
1586+
raise ValueError(f"Invalid number of ids: {max_elements} > {max_allow_elements} ")
15601587

1561-
id_value = """{schema} {source_id}""".format(**{'schema': schema, 'source_id': ids})
1562-
params_dict['ID'] = id_value
1563-
params_dict['SCHEMA'] = schema
1588+
params_dict = {}
1589+
params_dict['ID'] = ids_arg
1590+
if schema is not None:
1591+
params_dict['RELEASE'] = schema
15641592
params_dict['RETRIEVAL_TYPE'] = str(retrieval_type)
15651593
params_dict['USE_ZIP_ALWAYS'] = 'true'
15661594
params_dict['TAPCLIENT'] = 'ASTROQUERY'
@@ -1573,49 +1601,108 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15731601
if linking_parameter != 'SOURCE_ID':
15741602
params_dict['LINKING_PARAMETER'] = linking_parameter
15751603

1576-
fits_file = ids + '.fits.zip'
1577-
1578-
if output_file is not None:
1579-
if not output_file.endswith('.zip'):
1580-
output_file = output_file + '.zip'
1581-
1582-
if os.path.dirname(output_file) == '':
1583-
output_file = os.path.join(os.getcwd(), output_file)
1584-
1585-
if verbose:
1586-
print(f"Spectra output file: {output_file}")
1587-
1588-
output_file_full_path, output_dir = self.__set_dirs(output_file=output_file, observation_id=fits_file)
1604+
if output_file is None:
1605+
now = datetime.now(timezone.utc)
1606+
now_formatted = now.strftime("%Y%m%d_%H%M%S.%f")
1607+
path = os.path.join(os.getcwd(), "temp_" + now_formatted)
1608+
download_name_formatted = 'get_spectrum_output.zip'
1609+
output_file = os.path.join(path, download_name_formatted)
1610+
else:
1611+
path = os.path.dirname(output_file)
1612+
if path == '':
1613+
path = os.getcwd()
1614+
output_file = os.path.join(path, output_file)
15891615

15901616
if verbose:
1591-
print("Spectra output file: " + output_file_full_path)
1617+
print(f"Spectra output file: {output_file}")
15921618

1593-
if os.listdir(output_dir):
1594-
raise IOError(f'The directory is not empty: {output_dir}')
1595-
1596-
files = []
1619+
if not os.path.exists(path):
1620+
try:
1621+
os.mkdir(path)
1622+
except FileExistsError:
1623+
log.debug("Path %s already exist" % path)
1624+
except OSError:
1625+
log.error("Creation of the directory %s failed" % path)
15971626

15981627
try:
1599-
self.__eucliddata.load_data(params_dict=params_dict, output_file=output_file_full_path, verbose=verbose)
1628+
self.__eucliddata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose)
16001629
except HTTPError as err:
1601-
log.error(f'Cannot retrieve spectrum for source_id {ids}, schema {schema}. HTTP error: {err}')
1630+
log.error(f'Cannot retrieve spectrum for source_id {ids_arg}, schema {schema}. HTTP error: {err}')
16021631
return None
16031632
except Exception as exx:
1604-
log.error(f'Cannot retrieve spectrum for source_id {ids}, schema {schema}: {str(exx)}')
1633+
log.error(f'Cannot retrieve spectrum for source_id {ids_arg}, schema {schema}: {str(exx)}')
16051634
return None
16061635

1607-
self.__extract_file(output_file_full_path=output_file_full_path, output_dir=output_dir, files=files)
1608-
1609-
if files:
1610-
return files
1636+
try:
1637+
files = EuclidClass.__get_data_files(output_file=output_file, path=path)
1638+
except Exception as err:
1639+
raise err
16111640

1612-
self.__check_file_number(output_dir=output_dir,
1613-
output_file_name=os.path.basename(output_file_full_path),
1614-
output_file_full_path=output_file_full_path,
1615-
files=files)
1641+
if log.isEnabledFor(20):
1642+
log.debug("List of products available:")
1643+
for item in sorted([key for key in files.keys()]):
1644+
log.debug("Product = " + item)
16161645

16171646
return files
16181647

1648+
@staticmethod
1649+
def __remove_readonly(func, path, _):
1650+
"Clear the readonly bit and reattempt the removal"
1651+
os.chmod(path, stat.S_IWRITE)
1652+
func(path)
1653+
1654+
@staticmethod
1655+
def __get_data_files(output_file, path):
1656+
files = {}
1657+
extracted_files = []
1658+
1659+
with zipfile.ZipFile(output_file, "r") as zObject:
1660+
extracted_files.extend(zObject.namelist())
1661+
zObject.extractall(path)
1662+
1663+
# r=root, d=directories, f = files
1664+
for r, d, f in os.walk(path):
1665+
for file in f:
1666+
if file in extracted_files:
1667+
files[file] = os.path.join(r, file)
1668+
1669+
result = dict()
1670+
for key, value in files.items():
1671+
if key.endswith('.fits') and os.path.getsize(value) > 0:
1672+
1673+
# if memmap = True, another handle to the FITS file is opened by mmap.
1674+
# See https://docs.astropy.org/en/latest/io/fits/index.html
1675+
with fits.open(value, memmap=False) as hduList:
1676+
for hdu in hduList:
1677+
if hdu.header['NAXIS'] == 0:
1678+
continue
1679+
table = Table.read(hdu, format='fits')
1680+
EuclidClass.correct_table_units(table)
1681+
result[str(hdu.header['SOURC_ID']) + '_' + key] = table
1682+
1683+
return result
1684+
1685+
@staticmethod
1686+
def correct_table_units(table):
1687+
"""Correct format in the units of the columns
1688+
TAP & TAP+
1689+
1690+
Parameters
1691+
----------
1692+
table : `~astropy.table.Table`, mandatory
1693+
change the format of the units in the columns of the input table: '.' by ' ' and "'" by ""
1694+
"""
1695+
1696+
for cn in table.colnames:
1697+
col = table[cn]
1698+
if isinstance(col.unit, u.UnrecognizedUnit):
1699+
try:
1700+
col.unit = u.Unit(col.unit.name.replace(".", " ").replace("'", ""))
1701+
except Exception:
1702+
pass
1703+
elif isinstance(col.unit, str):
1704+
col.unit = col.unit.replace(".", " ").replace("'", "")
1705+
16191706
def get_datalinks(self, ids, *, linking_parameter='SOURCE_ID', extra_options=None, verbose=False):
16201707
"""
16211708
Gets datalinks associated to the provided identifiers (TAP+ only).
17.5 KB
Binary file not shown.

astroquery/esa/euclid/tests/test_euclidtap.py

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
PRODUCT_LIST_FILE_NAME = get_pkg_data_filename(os.path.join("data", 'test_get_product_list.vot'), package=package)
5050
TEST_GET_PRODUCT_LIST = Path(PRODUCT_LIST_FILE_NAME).read_text()
5151

52+
MULTPLE_GET_SPECTRUM = get_pkg_data_filename(os.path.join("data", 'get_spectrum_output.zip'), package=package)
53+
5254

5355
def make_table_metadata(table_name, ra, dec):
5456
tap_table = TapTableMeta()
@@ -1276,7 +1278,16 @@ def test_get_cutout_exceptions_2(mock_load_data, caplog):
12761278
assert caplog.records[1].msg == mssg
12771279

12781280

1279-
def test_get_spectrum(tmp_path_factory, capsys):
1281+
@pytest.mark.filterwarnings('ignore:')
1282+
@patch.object(TapPlus, 'load_data')
1283+
def test_get_spectrum(mock_load_data, tmp_path_factory, capsys):
1284+
def _fake_load_data(*args, **kwargs):
1285+
output_file = kwargs.get("output_file")
1286+
shutil.copy2(MULTPLE_GET_SPECTRUM, Path(output_file))
1287+
return None
1288+
1289+
mock_load_data.side_effect = _fake_load_data
1290+
12801291
conn_handler = DummyConnHandler()
12811292
tap_plus = TapPlus(url="http://test:1111/tap", data_context='data', client_id='ASTROQUERY',
12821293
connhandler=conn_handler)
@@ -1300,10 +1311,10 @@ def test_get_spectrum(tmp_path_factory, capsys):
13001311

13011312
remove_temp_dir()
13021313

1303-
fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.fits')
1314+
fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.zip')
13041315

13051316
result = tap.get_spectrum(ids='2417660845403252054', schema='sedm_sc8', output_file=fits_file)
1306-
assert os.path.exists(fits_file + '.zip')
1317+
assert os.path.exists(fits_file)
13071318

13081319
assert result is not None
13091320

@@ -1315,8 +1326,9 @@ def test_get_spectrum(tmp_path_factory, capsys):
13151326

13161327
captured = capsys.readouterr()
13171328

1318-
file_path = captured.out.splitlines()[0].replace('Spectra output file: ', '')
1329+
file_path = captured.out.splitlines()[1].replace('Spectra output file: ', '')
13191330
assert os.path.exists(file_path)
1331+
assert os.path.basename(file_path) == 'get_spectrum_output.zip'
13201332

13211333
remove_temp_dir()
13221334

@@ -1329,7 +1341,7 @@ def test_get_spectrum(tmp_path_factory, capsys):
13291341

13301342
remove_temp_dir()
13311343

1332-
fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.fits')
1344+
fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.zip')
13331345

13341346
result = tap.get_spectrum(ids='1499442653027920313123456789', schema='sedm_sc8', linking_parameter="SOURCEPATCH_ID",
13351347
output_file=fits_file)
@@ -1338,6 +1350,45 @@ def test_get_spectrum(tmp_path_factory, capsys):
13381350

13391351
remove_temp_dir()
13401352

1353+
# Multiple files
1354+
1355+
result = tap.get_spectrum(ids=['1499442653027920313', '1500431128027836270'], schema='sedm_sc8', output_file=None,
1356+
verbose=True)
1357+
1358+
assert result is not None
1359+
assert len(result) == 2
1360+
1361+
captured = capsys.readouterr()
1362+
1363+
file_path = captured.out.splitlines()[1].replace('Spectra output file: ', '')
1364+
assert os.path.exists(file_path)
1365+
assert os.path.basename(file_path) == 'get_spectrum_output.zip'
1366+
1367+
remove_temp_dir()
1368+
1369+
result = tap.get_spectrum(ids='1499442653027920313,1500431128027836270', schema='sedm_sc8', output_file=None,
1370+
verbose=True)
1371+
1372+
assert result is not None
1373+
assert len(result) == 2
1374+
1375+
captured = capsys.readouterr()
1376+
1377+
file_path = captured.out.splitlines()[1].replace('Spectra output file: ', '')
1378+
assert os.path.exists(file_path)
1379+
assert os.path.basename(file_path) == 'get_spectrum_output.zip'
1380+
1381+
remove_temp_dir()
1382+
1383+
ids = ['sedm 1499442653027920313'] * 2000
1384+
1385+
with pytest.raises(ValueError, match="Invalid number of ids: 2000 > 1000"):
1386+
tap.get_spectrum(ids=ids, schema='sedm_sc8', output_file=None, verbose=True)
1387+
1388+
message = "Missing data release in: ids = 1499442653027920313,1500431128027836270 and schema = None"
1389+
with pytest.raises(ValueError, match=message):
1390+
tap.get_spectrum(ids='1499442653027920313,1500431128027836270', schema=None, output_file=None, verbose=True)
1391+
13411392

13421393
@patch.object(TapPlus, 'load_data')
13431394
def test_get_spectrum_exceptions_2(mock_load_data, caplog):
@@ -1385,7 +1436,7 @@ def test_get_spectrum_exceptions():
13851436
with pytest.raises(ValueError, match="Missing required argument"):
13861437
tap.get_spectrum(ids=None, schema='sedm_sc8', output_file=None)
13871438

1388-
with pytest.raises(ValueError, match="Missing required argument"):
1439+
with pytest.raises(ValueError, match="Missing data release in: ids = 2417660845403252054 and schema = None "):
13891440
tap.get_spectrum(ids='2417660845403252054', schema=None, output_file=None)
13901441

13911442
with pytest.raises(ValueError, match=(

0 commit comments

Comments
 (0)