Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
New Tools and Services
----------------------



API changes
-----------

Expand Down Expand Up @@ -35,6 +33,7 @@ heasarc
- Add ``query_constraints`` to allow querying of different catalog columns. [#3403]
- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403]
- Add automatic guessing for the data host in ``download_data``. [#3403]
- Adding method heasarc.query_all(). [#3499]

gaia
^^^^
Expand Down
266 changes: 265 additions & 1 deletion astroquery/heasarc/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from astropy import coordinates
from astropy import units as u
from astropy.utils.decorators import deprecated, deprecated_renamed_argument

from astropy.time import Time
import pyvo

from astroquery import log
Expand Down Expand Up @@ -612,6 +612,270 @@ def query_object(self, object_name, mission, *,
return self.query_region(pos, catalog=mission, spatial='cone',
get_query_payload=get_query_payload)

def _get_vector(ra=None, dec=None):
"""
If the input is a string name of a column like "a.ra", then this routine
constructs the unit vector column names that can be added to the SQL query
to represent the unit vector. If the input is a number, then it will actually
calculate the unit vector and return the values as strings to be added to the
SQL query.

The former is used to fetch pre-computed unit vectors columns associated with
the table being queried. The latter is used to compute the input position unit
vector only once and put the numeric value in the query constraint.
"""
# Note that Astropy flips x and y compared to this, which is used internally
# despite the fact that our RA, DEC values are in ICRS.
try:
r, d = np.radians([float(ra), float(dec)])
return (
np.cos(d) * np.sin(r),
np.cos(d) * np.cos(r),
np.sin(d)
)
except ValueError:
prefix = ra.split('.')[0] # e.g., 'a' from 'a.ra'
return (f"{prefix}.__x_ra_dec", f"{prefix}.__y_ra_dec", f"{prefix}.__z_ra_dec")
except Exception as e:
raise e

def _fast_geometry_constraint(ra, dec, large=False, radius=None):
"""
Construct the spatial constraint to be added to the WHERE clause. It compares
the input position with the catalog's pre-computed unit vector columns
with the computation optimized for speed. The optimization was done by Tom McGlynn
for the Xamin GUI and the algorithm copied here.

The master position tables are split into those where the default sensible search
radius is larger or smaller than 1 degree.
"""
vec0 = HeasarcClass._get_vector("a.ra", "a.dec")
vec1 = HeasarcClass._get_vector(ra, dec)
dot_product = " + ".join([f"{vec0[i]}*{vec1[i]}" for i in range(3)])
if radius is not None:
if not isinstance(radius, (int, float)):
radius = radius.value
radius_condition = f"{dot_product} > (cos(radians(({radius}))))"
dec_condition = f"a.dec between {dec} - {radius} and {dec} + {radius}"
else:
# Assuming 'a.dsr' is the default search radius column in degrees. This value is
# defined by HEASARC curators for each table.
radius_condition = f"{dot_product} > (cos(radians((a.dsr))))"
dec_condition = f"a.dec between {dec} - a.dsr and {dec} + a.dsr"
if large:
return f"""
( ({radius_condition})
and ({dec_condition}) )
"""
else:
# Additional constraints on tables with search radii less than 1 deg,
# which speeds up the whole thing.
radius_condition_1deg = f"{dot_product} > {np.cos(np.radians(1.0))}"
dec_condition_1deg = f"a.dec between {float(dec) - 1} and {float(dec)+1}"
return f"""
( ({radius_condition})
and ({dec_condition})
and ({radius_condition_1deg})
and ({dec_condition_1deg})
)
"""

def _time_constraint(start_time=None, end_time=None):
""""
Converts input string like "2025-01-02T01:00:00..2025-01-05T23:59:59"
into a decimal MJD constraint.
"""
start_mjd = Time(start_time, format='isot').mjd
end_mjd = Time(end_time, format='isot').mjd
return f"end_time > {start_mjd:.6f} AND start_time < {end_mjd:.6f}"

def _query_matches(ra=None, dec=None, start_time=None, end_time=None, radius=None):
"""
Constructs the full SQL query including the spatial and time constraints.
Note that this queries multiple tables, as the HEASARC database has split
the master tables for efficiency.
"""
offset_def = ''
if ra is not None:
constraint_small = HeasarcClass._fast_geometry_constraint(ra, dec, large=False, radius=radius)
constraint_big = HeasarcClass._fast_geometry_constraint(ra, dec, large=True, radius=radius)
# Note that at least in HEASARC implementation, using DISTANCE in a column
# definition is fine, but it's very slow in a WHERE clause.
offset_def = f''', MAX(DISTANCE(POINT('ICRS', a.ra, a.dec),
POINT('ICRS', {ra}, {dec}))) as max_offset_deg
'''

if start_time is not None:
constraint_time = HeasarcClass._time_constraint(start_time, end_time)

tname1, tname2 = None, None
if ra is not None and start_time is None:
tname1, tname2 = 'pos_small', 'pos_big'
elif ra is not None and start_time is not None:
tname1, tname2 = 'pos_time_small', 'pos_time_big'
constraint_small += f" AND {constraint_time}"
constraint_big += f" AND {constraint_time}"
elif ra is None and start_time is not None:
tname1 = 'time'
else:
raise ValueError("You must specify either a position or time range or both")

# These columns are only in b, so can remove the b.column
select_block = f'''
SELECT table_name, count(*) AS count, b.description,
b.regime, b.mission, b.type AS obj_type{offset_def}
'''

groupby_block = "GROUP BY table_name, b.description, b.regime, b.mission, b.type"

if ra is not None:
full_query = f'''
{select_block}
FROM master_table.{tname1} AS a,
master_table.indexview AS b
WHERE a.table_name = b.name AND {constraint_small}
{groupby_block}
UNION ALL
{select_block}
FROM master_table.{tname2} AS a,
master_table.indexview AS b
WHERE a.table_name = b.name AND {constraint_big}
{groupby_block}
ORDER BY count DESC
'''
else:
full_query = f'''
{select_block}
FROM master_table.{tname1} AS a, master_table.indexview AS b
WHERE a.table_name = b.name AND {constraint_time}
{groupby_block}
ORDER BY count DESC
'''

# rename for readability
full_query = f"""
select r.table_name as table_name, r.count as count, r.description as description,
r.regime as regime, r.mission as mission, r.obj_type as obj_type,
r.max_offset_deg as max_offset_deg from ({full_query}) as r"""

# Join all parts of the query, ensuring simple spacing
return HeasarcClass._fix_sql_whitespace(full_query)

def _fix_sql_whitespace(insql):
import re
return re.sub(r'\s+', ' ', insql).replace("\n", " ").strip()

def _set_print_formats(table):
"""
Set the Astropy format (e.g., '.5f' or '.3e') so that
the columns look sensible.
"""
for colname in table.columns:
col = table[colname]
if col.dtype.kind not in 'f':
continue
if (abs(col.min()) < 1e-10 and abs(col.min()) > 0.0) or abs(col.max() > 1e10):
col.format = "%10e"
else:
col.format = "%10f"
return (table)

def query_all(self, position=None, get_query_payload=False, start_time=None,
end_time=None, verbose=False, maxrec=None, radius=None):
"""
Query the HEASARC database to count matches at a given position for all available catalogs.

Parameters
----------
position : str, `astropy.coordinates` object
The position around which to search. Must be a SkyCoord object or a string
that Astropy can convert.
start_time : str, `astropy.time` object, optional
Beginning of time range of interest as a string in ISOT format
or Time object.
end_time : str, `astropy.time` object, optional
End of time range of interest as a string in ISOT format
or Time object.
get_query_payload : bool, optional, optional
If `True` then returns the generated ADQL query as str and does not send the query.
Defaults to `False`.
radius : str or `~astropy.units.Quantity` object, optional
If this radius is None, the specified coordinate is compared to each mission
catalog entry using that catalog's default radius. (See get_default_radius().)
This is based on the approximate location uncertainty for each mission. If you
specify a radius in degrees, it uses that instead.
verbose : bool, optional
If True, prints additional information about the query. Default is False.
maxrec : int, optional
The maximum number of records to return. If None, all matching records are returned up to the server limit.
**kwargs : dict, optional
Additional keyword arguments:

Returns
-------
result : `~astropy.table.Table`
A table containing the results of the query, i.e. a list of catalogs
that have entries near the specified position, how many, and quick catalog
information included the computed offset between the two positions in degrees.
If no results are found, an empty table is returned and a warning is issued.

Raises
------
ValueError
If the position is not provided or is not a SkyCoord object.

Notes
-----
This method queries all HEASARC catalogs for sources near the specified position.
The results include the table name, number of matches, table description, regime,
mission, and object type for each catalog.

By default, the search radius foreach table is adjusted according to the positional
accuracy in that table. This gives you results most likely to be relevant to your
search. But if you specify a radius, that will be used in all catalogs.
Be aware that for missions with large uncertainties, e.g., 40 degrees for hete2,
when you search within a very small radius, you may not find some relevant catalog
entries that might be of interest. And conversely, if you specify a larger
radius than the default, you will get more results further from the position specified.

The user can then select the table name(s) of interest and use the query_object(),
query_region(), etc.

The query uses the HEASARC TAP service to search position-only master tables efficiently.

"""
if position is not None:
coords_icrs = parse_coordinates(position).icrs
ra, dec = coords_icrs.ra.deg, coords_icrs.dec.deg
elif position is None and start_time is not None:
ra = None
dec = None
elif ((position is None and start_time is None)):
raise ValueError("A valid position and/or a time range must be provided.")

full_query = HeasarcClass._query_matches(ra=ra, dec=dec,
start_time=start_time,
end_time=end_time, radius=radius)

if get_query_payload:
return full_query

response = self.query_tap(query=full_query, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response

table = response.to_table()
if len(table) == 0:
warnings.warn(
NoResultsWarning("No matching rows were found in the query.")
)
return table

# Because astropy Tables don't keep all the VOTable metadata,
# this prints in more sensible formats and avoids confusion.
return HeasarcClass._set_print_formats(table)

def locate_data(self, query_result=None, catalog_name=None):
"""Get links to data products
Use vo/datalinks to query the data products for some query_results.
Expand Down
Loading
Loading