EUCLIDMNGT-1447 retrieve multiple spectra

Jorge Fernandez Hernandez · Jorge Fernandez Hernandez · commit 6af4e468d660 · 2026-03-26T21:28:22.000+01:00
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -37,6 +37,8 @@ esa.euclid
   The method now only supports retrieval of MER (background‑subtracted) image cutouts. [#3559]
 - The ``get_product_list`` method now also returns file_name_list column when the product type belongs to 
   BASIC_DOWNLOAD_DATA_PRODUCTS. [#3562]
+- The method ``get_spectrum`` accepts a single source_id or designation or multiple values separated by commas or a
+   list. [#3570]
 
 
 vizier
diff --git a/astroquery/esa/euclid/__init__.py b/astroquery/esa/euclid/__init__.py
@@ -18,6 +18,8 @@ class Conf(_config.ConfigNamespace):
     ROW_LIMIT = _config.ConfigItem(50,
                                    "Number of rows to return from database query (set to -1 for unlimited).")
 
+    SPECTRA_LIMIT = _config.ConfigItem(1000, "Maximum amount of sources with spectra")
+
     USE_NAMES_OVER_IDS = _config.ConfigItem(True,
                                             "When converting from an astropy.io.votable.tree.TableElement object to "
                                             "an astropy.table.Table object, you can specify whether to give "
diff --git a/astroquery/esa/euclid/core.py b/astroquery/esa/euclid/core.py
@@ -9,14 +9,19 @@
 import binascii
 import os
 import pprint
+import re
+import stat
 import tarfile
 import zipfile
 from collections.abc import Iterable
 from datetime import datetime
+from datetime import timezone
 
 from astropy import units
 from astropy import units as u
 from astropy.coordinates import Angle
+from astropy.io import fits
+from astropy.table import Table
 from astropy.units import Quantity
 from astropy.utils import deprecated_renamed_argument
 from requests.exceptions import HTTPError
@@ -45,6 +50,7 @@ class EuclidClass(TapPlus):
 
     __VALID_DATALINK_RETRIEVAL_TYPES = conf.VALID_DATALINK_RETRIEVAL_TYPES
     __VALID_LINKING_PARAMETERS = conf.VALID_LINKING_PARAMETERS
+    __regex_designation = re.compile(r"\s*(\S+)\s(-?\d+)\s*", flags=re.MULTILINE | re.UNICODE)
 
     def __init__(self, *, environment='PDR', tap_plus_conn_handler=None, datalink_handler=None, cutout_handler=None,
                  verbose=False, show_server_messages=True):
@@ -1517,15 +1523,15 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
 
         The spectrum associated with the source_id is downloaded as a compressed fits file, and the files it contains
         are returned in a list. The compressed fits file is saved in the local path given by output_file. If this
-        parameter is not set, the result is saved in the file "<working
-        directory>/temp_<%Y%m%d_%H%M%S>/<source_id>.fits.zip". In any case, the content of the zip file is
-        automatically extracted.
+        parameter is not set, the result is saved in the file "<working directory>/temp_<%Y%m%d_%H%M%S>/
+        get_spectrum_output.zip". In any case, the content of the zip file is automatically extracted.
 
         Parameters
         ----------
-        ids : str or int, mandatory
-            identifier for the spectrum
-        schema : str, mandatory, default 'sedm'
+        ids : str, int, str list or int list, mandatory
+            The identifier (<source_id>) or designation (<data-release>+blank+<source_id>). Can be a single designation
+            or id, a string with multiple values separated by commas, or a list.
+        schema : str, optional, default 'sedm'
             the data release
         retrieval_type : str, optional, default 'ALL' to retrieve all data from the list of sources
             retrieval type identifier. Possible values are: 'SPECTRA_BGS' for the blue spectrum and 'SPECTRA_RGS' for
@@ -1536,7 +1542,7 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
             SOURCEPATCH_ID: the identifiers are considered as sourcepatch_id
         output_file : str, optional
             output file name. If no value is provided, a temporary one is created with the name
-            "<working directory>/temp_<%Y%m%d_%H%M%S>/<source_id>.fits"
+            "<working directory>/temp_<%Y%m%d_%H%M%S>/get_spectrum_output.zip"
         verbose : bool, optional, default 'False'
             flag to display information about the process
 
@@ -1548,19 +1554,41 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
 
         """
 
-        if ids is None or schema is None:
+        if ids is None:
+            raise ValueError(self.__ERROR_MSG_REQUESTED_GENERIC)
+
+        if isinstance(ids, (list, tuple)) and not ids:
             raise ValueError(self.__ERROR_MSG_REQUESTED_GENERIC)
 
         rt = str(retrieval_type).upper()
         if rt != 'ALL' and rt not in self.__VALID_DATALINK_RETRIEVAL_TYPES:
             raise ValueError(f"Invalid argument value for 'retrieval_type'. Found {retrieval_type}, "
                              f"expected: 'ALL' or any of {self.__VALID_DATALINK_RETRIEVAL_TYPES}")
 
-        params_dict = {}
+        max_allow_elements = conf.SPECTRA_LIMIT
+        max_elements = 1
+        if isinstance(ids, str):
+            ids_arg = ids
+            if ',' in ids:
+                max_elements = ids.count(',')
+        elif isinstance(ids, int):
+            ids_arg = str(ids)
+        elif isinstance(ids, (list, tuple)):
+            max_elements = len(ids)
+            ids_arg = ','.join(str(item) for item in ids)
+        else:
+            raise ValueError(self.__ERROR_MSG_REQUESTED_GENERIC)
+
+        if not self.__regex_designation.search(ids_arg) and schema is None:
+            raise ValueError(f"Missing data release in: ids = {ids_arg} and schema = {schema} ")
+
+        if max_elements > max_allow_elements:
+            raise ValueError(f"Invalid number of ids:  {max_elements} > {max_allow_elements} ")
 
-        id_value = """{schema} {source_id}""".format(**{'schema': schema, 'source_id': ids})
-        params_dict['ID'] = id_value
-        params_dict['SCHEMA'] = schema
+        params_dict = {}
+        params_dict['ID'] = ids_arg
+        if schema is not None:
+            params_dict['RELEASE'] = schema
         params_dict['RETRIEVAL_TYPE'] = str(retrieval_type)
         params_dict['USE_ZIP_ALWAYS'] = 'true'
         params_dict['TAPCLIENT'] = 'ASTROQUERY'
@@ -1573,49 +1601,108 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
             if linking_parameter != 'SOURCE_ID':
                 params_dict['LINKING_PARAMETER'] = linking_parameter
 
-        fits_file = ids + '.fits.zip'
-
-        if output_file is not None:
-            if not output_file.endswith('.zip'):
-                output_file = output_file + '.zip'
-
-            if os.path.dirname(output_file) == '':
-                output_file = os.path.join(os.getcwd(), output_file)
-
-            if verbose:
-                print(f"Spectra output file: {output_file}")
-
-        output_file_full_path, output_dir = self.__set_dirs(output_file=output_file, observation_id=fits_file)
+        if output_file is None:
+            now = datetime.now(timezone.utc)
+            now_formatted = now.strftime("%Y%m%d_%H%M%S.%f")
+            path = os.path.join(os.getcwd(), "temp_" + now_formatted)
+            download_name_formatted = 'get_spectrum_output.zip'
+            output_file = os.path.join(path, download_name_formatted)
+        else:
+            path = os.path.dirname(output_file)
+            if path == '':
+                path = os.getcwd()
+                output_file = os.path.join(path, output_file)
 
         if verbose:
-            print("Spectra output file: " + output_file_full_path)
+            print(f"Spectra output file: {output_file}")
 
-        if os.listdir(output_dir):
-            raise IOError(f'The directory is not empty: {output_dir}')
-
-        files = []
+        if not os.path.exists(path):
+            try:
+                os.mkdir(path)
+            except FileExistsError:
+                log.debug("Path %s already exist" % path)
+            except OSError:
+                log.error("Creation of the directory %s failed" % path)
 
         try:
-            self.__eucliddata.load_data(params_dict=params_dict, output_file=output_file_full_path, verbose=verbose)
+            self.__eucliddata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose)
         except HTTPError as err:
-            log.error(f'Cannot retrieve spectrum for source_id {ids}, schema {schema}. HTTP error: {err}')
+            log.error(f'Cannot retrieve spectrum for source_id {ids_arg}, schema {schema}. HTTP error: {err}')
             return None
         except Exception as exx:
-            log.error(f'Cannot retrieve spectrum for source_id {ids}, schema {schema}: {str(exx)}')
+            log.error(f'Cannot retrieve spectrum for source_id {ids_arg}, schema {schema}: {str(exx)}')
             return None
 
-        self.__extract_file(output_file_full_path=output_file_full_path, output_dir=output_dir, files=files)
-
-        if files:
-            return files
+        try:
+            files = EuclidClass.__get_data_files(output_file=output_file, path=path)
+        except Exception as err:
+            raise err
 
-        self.__check_file_number(output_dir=output_dir,
-                                 output_file_name=os.path.basename(output_file_full_path),
-                                 output_file_full_path=output_file_full_path,
-                                 files=files)
+        if log.isEnabledFor(20):
+            log.debug("List of products available:")
+            for item in sorted([key for key in files.keys()]):
+                log.debug("Product = " + item)
 
         return files
 
+    @staticmethod
+    def __remove_readonly(func, path, _):
+        "Clear the readonly bit and reattempt the removal"
+        os.chmod(path, stat.S_IWRITE)
+        func(path)
+
+    @staticmethod
+    def __get_data_files(output_file, path):
+        files = {}
+        extracted_files = []
+
+        with zipfile.ZipFile(output_file, "r") as zObject:
+            extracted_files.extend(zObject.namelist())
+            zObject.extractall(path)
+
+        # r=root, d=directories, f = files
+        for r, d, f in os.walk(path):
+            for file in f:
+                if file in extracted_files:
+                    files[file] = os.path.join(r, file)
+
+        result = dict()
+        for key, value in files.items():
+            if key.endswith('.fits') and os.path.getsize(value) > 0:
+
+                # if memmap = True, another handle to the FITS file is opened by mmap.
+                # See https://docs.astropy.org/en/latest/io/fits/index.html
+                with fits.open(value, memmap=False) as hduList:
+                    for hdu in hduList:
+                        if hdu.header['NAXIS'] == 0:
+                            continue
+                        table = Table.read(hdu, format='fits')
+                        EuclidClass.correct_table_units(table)
+                        result[str(hdu.header['SOURC_ID']) + '_' + key] = table
+
+        return result
+
+    @staticmethod
+    def correct_table_units(table):
+        """Correct format in the units of the columns
+        TAP & TAP+
+
+        Parameters
+        ----------
+        table : `~astropy.table.Table`, mandatory
+            change the format of the units in the columns of the input table: '.' by ' ' and "'" by ""
+        """
+
+        for cn in table.colnames:
+            col = table[cn]
+            if isinstance(col.unit, u.UnrecognizedUnit):
+                try:
+                    col.unit = u.Unit(col.unit.name.replace(".", " ").replace("'", ""))
+                except Exception:
+                    pass
+            elif isinstance(col.unit, str):
+                col.unit = col.unit.replace(".", " ").replace("'", "")
+
     def get_datalinks(self, ids, *, linking_parameter='SOURCE_ID', extra_options=None, verbose=False):
         """
         Gets datalinks associated to the provided identifiers (TAP+ only).
diff --git a/astroquery/esa/euclid/tests/data/get_spectrum_output.zip b/astroquery/esa/euclid/tests/data/get_spectrum_output.zip
diff --git a/astroquery/esa/euclid/tests/test_euclidtap.py b/astroquery/esa/euclid/tests/test_euclidtap.py
@@ -49,6 +49,8 @@
 PRODUCT_LIST_FILE_NAME = get_pkg_data_filename(os.path.join("data", 'test_get_product_list.vot'), package=package)
 TEST_GET_PRODUCT_LIST = Path(PRODUCT_LIST_FILE_NAME).read_text()
 
+MULTPLE_GET_SPECTRUM = get_pkg_data_filename(os.path.join("data", 'get_spectrum_output.zip'), package=package)
+
 
 def make_table_metadata(table_name, ra, dec):
     tap_table = TapTableMeta()
@@ -1276,7 +1278,16 @@ def test_get_cutout_exceptions_2(mock_load_data, caplog):
     assert caplog.records[1].msg == mssg
 
 
-def test_get_spectrum(tmp_path_factory, capsys):
+@pytest.mark.filterwarnings('ignore:')
+@patch.object(TapPlus, 'load_data')
+def test_get_spectrum(mock_load_data, tmp_path_factory, capsys):
+    def _fake_load_data(*args, **kwargs):
+        output_file = kwargs.get("output_file")
+        shutil.copy2(MULTPLE_GET_SPECTRUM, Path(output_file))
+        return None
+
+    mock_load_data.side_effect = _fake_load_data
+
     conn_handler = DummyConnHandler()
     tap_plus = TapPlus(url="http://test:1111/tap", data_context='data', client_id='ASTROQUERY',
                        connhandler=conn_handler)
@@ -1300,10 +1311,10 @@ def test_get_spectrum(tmp_path_factory, capsys):
 
     remove_temp_dir()
 
-    fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.fits')
+    fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.zip')
 
     result = tap.get_spectrum(ids='2417660845403252054', schema='sedm_sc8', output_file=fits_file)
-    assert os.path.exists(fits_file + '.zip')
+    assert os.path.exists(fits_file)
 
     assert result is not None
 
@@ -1315,8 +1326,9 @@ def test_get_spectrum(tmp_path_factory, capsys):
 
     captured = capsys.readouterr()
 
-    file_path = captured.out.splitlines()[0].replace('Spectra output file: ', '')
+    file_path = captured.out.splitlines()[1].replace('Spectra output file: ', '')
     assert os.path.exists(file_path)
+    assert os.path.basename(file_path) == 'get_spectrum_output.zip'
 
     remove_temp_dir()
 
@@ -1329,7 +1341,7 @@ def test_get_spectrum(tmp_path_factory, capsys):
 
     remove_temp_dir()
 
-    fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.fits')
+    fits_file = os.path.join(tmp_path_factory.mktemp("euclid_tmp"), 'my_fits_file.zip')
 
     result = tap.get_spectrum(ids='1499442653027920313123456789', schema='sedm_sc8', linking_parameter="SOURCEPATCH_ID",
                               output_file=fits_file)
@@ -1338,6 +1350,45 @@ def test_get_spectrum(tmp_path_factory, capsys):
 
     remove_temp_dir()
 
+    # Multiple files
+
+    result = tap.get_spectrum(ids=['1499442653027920313', '1500431128027836270'], schema='sedm_sc8', output_file=None,
+                              verbose=True)
+
+    assert result is not None
+    assert len(result) == 2
+
+    captured = capsys.readouterr()
+
+    file_path = captured.out.splitlines()[1].replace('Spectra output file: ', '')
+    assert os.path.exists(file_path)
+    assert os.path.basename(file_path) == 'get_spectrum_output.zip'
+
+    remove_temp_dir()
+
+    result = tap.get_spectrum(ids='1499442653027920313,1500431128027836270', schema='sedm_sc8', output_file=None,
+                              verbose=True)
+
+    assert result is not None
+    assert len(result) == 2
+
+    captured = capsys.readouterr()
+
+    file_path = captured.out.splitlines()[1].replace('Spectra output file: ', '')
+    assert os.path.exists(file_path)
+    assert os.path.basename(file_path) == 'get_spectrum_output.zip'
+
+    remove_temp_dir()
+
+    ids = ['sedm 1499442653027920313'] * 2000
+
+    with pytest.raises(ValueError, match="Invalid number of ids:  2000 > 1000"):
+        tap.get_spectrum(ids=ids, schema='sedm_sc8', output_file=None, verbose=True)
+
+    message = "Missing data release in: ids = 1499442653027920313,1500431128027836270 and schema = None"
+    with pytest.raises(ValueError, match=message):
+        tap.get_spectrum(ids='1499442653027920313,1500431128027836270', schema=None, output_file=None, verbose=True)
+
 
 @patch.object(TapPlus, 'load_data')
 def test_get_spectrum_exceptions_2(mock_load_data, caplog):
@@ -1385,7 +1436,7 @@ def test_get_spectrum_exceptions():
     with pytest.raises(ValueError, match="Missing required argument"):
         tap.get_spectrum(ids=None, schema='sedm_sc8', output_file=None)
 
-    with pytest.raises(ValueError, match="Missing required argument"):
+    with pytest.raises(ValueError, match="Missing data release in: ids = 2417660845403252054 and schema = None "):
         tap.get_spectrum(ids='2417660845403252054', schema=None, output_file=None)
 
     with pytest.raises(ValueError, match=(