99import binascii
1010import os
1111import pprint
12+ import re
13+ import stat
1214import tarfile
1315import zipfile
1416from collections .abc import Iterable
1517from datetime import datetime
18+ from datetime import timezone
1619
1720from astropy import units
1821from astropy import units as u
1922from astropy .coordinates import Angle
23+ from astropy .io import fits
24+ from astropy .table import Table
2025from astropy .units import Quantity
2126from astropy .utils import deprecated_renamed_argument
2227from requests .exceptions import HTTPError
@@ -45,6 +50,7 @@ class EuclidClass(TapPlus):
4550
4651 __VALID_DATALINK_RETRIEVAL_TYPES = conf .VALID_DATALINK_RETRIEVAL_TYPES
4752 __VALID_LINKING_PARAMETERS = conf .VALID_LINKING_PARAMETERS
53+ __regex_designation = re .compile (r"\s*(\S+)\s(-?\d+)\s*" , flags = re .MULTILINE | re .UNICODE )
4854
4955 def __init__ (self , * , environment = 'PDR' , tap_plus_conn_handler = None , datalink_handler = None , cutout_handler = None ,
5056 verbose = False , show_server_messages = True ):
@@ -1517,15 +1523,15 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15171523
15181524 The spectrum associated with the source_id is downloaded as a compressed fits file, and the files it contains
15191525 are returned in a list. The compressed fits file is saved in the local path given by output_file. If this
1520- parameter is not set, the result is saved in the file "<working
1521- directory>/temp_<%Y%m%d_%H%M%S>/<source_id>.fits.zip". In any case, the content of the zip file is
1522- automatically extracted.
1526+ parameter is not set, the result is saved in the file "<working directory>/temp_<%Y%m%d_%H%M%S>/
1527+ get_spectrum_output.zip". In any case, the content of the zip file is automatically extracted.
15231528
15241529 Parameters
15251530 ----------
1526- ids : str or int, mandatory
1527- identifier for the spectrum
1528- schema : str, mandatory, default 'sedm'
1531+ ids : str, int, str list or int list, mandatory
1532+ The identifier (<source_id>) or designation (<data-release>+blank+<source_id>). Can be a single designation
1533+ or id, a string with multiple values separated by commas, or a list.
1534+ schema : str, optional, default 'sedm'
15291535 the data release
15301536 retrieval_type : str, optional, default 'ALL' to retrieve all data from the list of sources
15311537 retrieval type identifier. Possible values are: 'SPECTRA_BGS' for the blue spectrum and 'SPECTRA_RGS' for
@@ -1536,7 +1542,7 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15361542 SOURCEPATCH_ID: the identifiers are considered as sourcepatch_id
15371543 output_file : str, optional
15381544 output file name. If no value is provided, a temporary one is created with the name
1539- "<working directory>/temp_<%Y%m%d_%H%M%S>/<source_id>.fits "
1545+ "<working directory>/temp_<%Y%m%d_%H%M%S>/get_spectrum_output.zip "
15401546 verbose : bool, optional, default 'False'
15411547 flag to display information about the process
15421548
@@ -1548,19 +1554,41 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15481554
15491555 """
15501556
1551- if ids is None or schema is None :
1557+ if ids is None :
1558+ raise ValueError (self .__ERROR_MSG_REQUESTED_GENERIC )
1559+
1560+ if isinstance (ids , (list , tuple )) and not ids :
15521561 raise ValueError (self .__ERROR_MSG_REQUESTED_GENERIC )
15531562
15541563 rt = str (retrieval_type ).upper ()
15551564 if rt != 'ALL' and rt not in self .__VALID_DATALINK_RETRIEVAL_TYPES :
15561565 raise ValueError (f"Invalid argument value for 'retrieval_type'. Found { retrieval_type } , "
15571566 f"expected: 'ALL' or any of { self .__VALID_DATALINK_RETRIEVAL_TYPES } " )
15581567
1559- params_dict = {}
1568+ max_allow_elements = conf .SPECTRA_LIMIT
1569+ max_elements = 1
1570+ if isinstance (ids , str ):
1571+ ids_arg = ids
1572+ if ',' in ids :
1573+ max_elements = ids .count (',' )
1574+ elif isinstance (ids , int ):
1575+ ids_arg = str (ids )
1576+ elif isinstance (ids , (list , tuple )):
1577+ max_elements = len (ids )
1578+ ids_arg = ',' .join (str (item ) for item in ids )
1579+ else :
1580+ raise ValueError (self .__ERROR_MSG_REQUESTED_GENERIC )
1581+
1582+ if not self .__regex_designation .search (ids_arg ) and schema is None :
1583+ raise ValueError (f"Missing data release in: ids = { ids_arg } and schema = { schema } " )
1584+
1585+ if max_elements > max_allow_elements :
1586+ raise ValueError (f"Invalid number of ids: { max_elements } > { max_allow_elements } " )
15601587
1561- id_value = """{schema} {source_id}""" .format (** {'schema' : schema , 'source_id' : ids })
1562- params_dict ['ID' ] = id_value
1563- params_dict ['SCHEMA' ] = schema
1588+ params_dict = {}
1589+ params_dict ['ID' ] = ids_arg
1590+ if schema is not None :
1591+ params_dict ['RELEASE' ] = schema
15641592 params_dict ['RETRIEVAL_TYPE' ] = str (retrieval_type )
15651593 params_dict ['USE_ZIP_ALWAYS' ] = 'true'
15661594 params_dict ['TAPCLIENT' ] = 'ASTROQUERY'
@@ -1573,49 +1601,108 @@ def get_spectrum(self, *, ids, schema='sedm', retrieval_type="ALL", linking_para
15731601 if linking_parameter != 'SOURCE_ID' :
15741602 params_dict ['LINKING_PARAMETER' ] = linking_parameter
15751603
1576- fits_file = ids + '.fits.zip'
1577-
1578- if output_file is not None :
1579- if not output_file .endswith ('.zip' ):
1580- output_file = output_file + '.zip'
1581-
1582- if os .path .dirname (output_file ) == '' :
1583- output_file = os .path .join (os .getcwd (), output_file )
1584-
1585- if verbose :
1586- print (f"Spectra output file: { output_file } " )
1587-
1588- output_file_full_path , output_dir = self .__set_dirs (output_file = output_file , observation_id = fits_file )
1604+ if output_file is None :
1605+ now = datetime .now (timezone .utc )
1606+ now_formatted = now .strftime ("%Y%m%d_%H%M%S.%f" )
1607+ path = os .path .join (os .getcwd (), "temp_" + now_formatted )
1608+ download_name_formatted = 'get_spectrum_output.zip'
1609+ output_file = os .path .join (path , download_name_formatted )
1610+ else :
1611+ path = os .path .dirname (output_file )
1612+ if path == '' :
1613+ path = os .getcwd ()
1614+ output_file = os .path .join (path , output_file )
15891615
15901616 if verbose :
1591- print ("Spectra output file: " + output_file_full_path )
1617+ print (f "Spectra output file: { output_file } " )
15921618
1593- if os .listdir (output_dir ):
1594- raise IOError (f'The directory is not empty: { output_dir } ' )
1595-
1596- files = []
1619+ if not os .path .exists (path ):
1620+ try :
1621+ os .mkdir (path )
1622+ except FileExistsError :
1623+ log .debug ("Path %s already exist" % path )
1624+ except OSError :
1625+ log .error ("Creation of the directory %s failed" % path )
15971626
15981627 try :
1599- self .__eucliddata .load_data (params_dict = params_dict , output_file = output_file_full_path , verbose = verbose )
1628+ self .__eucliddata .load_data (params_dict = params_dict , output_file = output_file , verbose = verbose )
16001629 except HTTPError as err :
1601- log .error (f'Cannot retrieve spectrum for source_id { ids } , schema { schema } . HTTP error: { err } ' )
1630+ log .error (f'Cannot retrieve spectrum for source_id { ids_arg } , schema { schema } . HTTP error: { err } ' )
16021631 return None
16031632 except Exception as exx :
1604- log .error (f'Cannot retrieve spectrum for source_id { ids } , schema { schema } : { str (exx )} ' )
1633+ log .error (f'Cannot retrieve spectrum for source_id { ids_arg } , schema { schema } : { str (exx )} ' )
16051634 return None
16061635
1607- self . __extract_file ( output_file_full_path = output_file_full_path , output_dir = output_dir , files = files )
1608-
1609- if files :
1610- return files
1636+ try :
1637+ files = EuclidClass . __get_data_files ( output_file = output_file , path = path )
1638+ except Exception as err :
1639+ raise err
16111640
1612- self . __check_file_number ( output_dir = output_dir ,
1613- output_file_name = os . path . basename ( output_file_full_path ),
1614- output_file_full_path = output_file_full_path ,
1615- files = files )
1641+ if log . isEnabledFor ( 20 ):
1642+ log . debug ( "List of products available:" )
1643+ for item in sorted ([ key for key in files . keys ()]):
1644+ log . debug ( "Product = " + item )
16161645
16171646 return files
16181647
1648+ @staticmethod
1649+ def __remove_readonly (func , path , _ ):
1650+ "Clear the readonly bit and reattempt the removal"
1651+ os .chmod (path , stat .S_IWRITE )
1652+ func (path )
1653+
1654+ @staticmethod
1655+ def __get_data_files (output_file , path ):
1656+ files = {}
1657+ extracted_files = []
1658+
1659+ with zipfile .ZipFile (output_file , "r" ) as zObject :
1660+ extracted_files .extend (zObject .namelist ())
1661+ zObject .extractall (path )
1662+
1663+ # r=root, d=directories, f = files
1664+ for r , d , f in os .walk (path ):
1665+ for file in f :
1666+ if file in extracted_files :
1667+ files [file ] = os .path .join (r , file )
1668+
1669+ result = dict ()
1670+ for key , value in files .items ():
1671+ if key .endswith ('.fits' ) and os .path .getsize (value ) > 0 :
1672+
1673+ # if memmap = True, another handle to the FITS file is opened by mmap.
1674+ # See https://docs.astropy.org/en/latest/io/fits/index.html
1675+ with fits .open (value , memmap = False ) as hduList :
1676+ for hdu in hduList :
1677+ if hdu .header ['NAXIS' ] == 0 :
1678+ continue
1679+ table = Table .read (hdu , format = 'fits' )
1680+ EuclidClass .correct_table_units (table )
1681+ result [str (hdu .header ['SOURC_ID' ]) + '_' + key ] = table
1682+
1683+ return result
1684+
1685+ @staticmethod
1686+ def correct_table_units (table ):
1687+ """Correct format in the units of the columns
1688+ TAP & TAP+
1689+
1690+ Parameters
1691+ ----------
1692+ table : `~astropy.table.Table`, mandatory
1693+ change the format of the units in the columns of the input table: '.' by ' ' and "'" by ""
1694+ """
1695+
1696+ for cn in table .colnames :
1697+ col = table [cn ]
1698+ if isinstance (col .unit , u .UnrecognizedUnit ):
1699+ try :
1700+ col .unit = u .Unit (col .unit .name .replace ("." , " " ).replace ("'" , "" ))
1701+ except Exception :
1702+ pass
1703+ elif isinstance (col .unit , str ):
1704+ col .unit = col .unit .replace ("." , " " ).replace ("'" , "" )
1705+
16191706 def get_datalinks (self , ids , * , linking_parameter = 'SOURCE_ID' , extra_options = None , verbose = False ):
16201707 """
16211708 Gets datalinks associated to the provided identifiers (TAP+ only).
0 commit comments