Skip to content

Commit 53d3900

Browse files
authored
Merge pull request #3 from snbianco/catalogs-query
Query Methods for new Catalogs
2 parents 4b5736a + e1017ad commit 53d3900

File tree

2 files changed

+851
-38
lines changed

2 files changed

+851
-38
lines changed

astroquery/mast/catalog_collection.py

Lines changed: 79 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,25 @@
1010

1111
__all__ = ['CatalogCollection']
1212

13+
DEFAULT_CATALOGS = {
14+
'caom': 'dbo.obspointing',
15+
'gaiadr3': 'dbo.gaia_source',
16+
'hsc': 'dbo.SumMagAper2CatView',
17+
'hscv2': 'dbo.SumMagAper2CatView',
18+
'missionmast': 'dbo.hst_science_missionmast',
19+
'ps1dr1': 'dbo.MeanObjectView',
20+
'ps1dr2': 'dbo.MeanObjectView',
21+
'ps1_dr2': 'ps1_dr2.forced_mean_object',
22+
'skymapperdr4': 'dr4.master',
23+
'tic': 'dbo.CatalogRecord',
24+
'classy': 'dbo.targets',
25+
'ullyses': 'dbo.sciencemetadata',
26+
'goods': 'dbo.goods_master_view',
27+
'3dhst': 'dbo.HLSP_3DHST_summary',
28+
'candels': 'dbo.candels_master_view',
29+
'deepspace': 'dbo.DeepSpace_Summary'
30+
}
31+
1332

1433
@dataclass
1534
class CatalogMetadata:
@@ -76,7 +95,7 @@ def get_catalog_metadata(self, catalog):
7695
return self._catalog_metadata_cache[catalog]
7796

7897
# Verify catalog validity for this collection
79-
self._verify_catalog(catalog)
98+
catalog = self._verify_catalog(catalog)
8099

81100
# Get column metadata
82101
metadata = self._get_column_metadata(catalog)
@@ -115,6 +134,10 @@ def get_default_catalog(self):
115134
str
116135
The default catalog name.
117136
"""
137+
# Check if collection has a known default catalog
138+
if self.name in DEFAULT_CATALOGS:
139+
return DEFAULT_CATALOGS[self.name]
140+
118141
# Pick default catalog = first one that does NOT start with "tap_schema."
119142
default_catalog = next((c for c in self.catalog_names if not c.startswith("tap_schema.")), None)
120143

@@ -188,29 +211,63 @@ def _get_adql_supported_functions(self):
188211

189212
def _verify_catalog(self, catalog):
190213
"""
191-
Verify that the specified catalog is valid for the given collection.
214+
Verify that the specified catalog is valid for this collection and return the correct catalog name.
215+
Raises an error if the catalog is not valid.
192216
193217
Parameters
194218
----------
195219
catalog : str
196220
The catalog to be verified.
197221
222+
Returns
223+
-------
224+
str
225+
The validated catalog name.
226+
198227
Raises
199228
------
200229
InvalidQueryError
201230
If the specified catalog is not valid for the given collection.
202231
"""
203-
lower_map = {name.lower(): name for name in self.catalog_names}
204-
if catalog.lower() not in lower_map:
205-
closest_match = difflib.get_close_matches(catalog, self.catalog_names, n=1)
206-
error_msg = (
207-
f"Catalog '{catalog}' is not recognized for collection '{self.name}'. "
208-
f"Did you mean '{closest_match[0]}'?"
209-
if closest_match
210-
else f"Catalog '{catalog}' is not recognized for collection '{self.name}'."
232+
catalog = catalog.lower()
233+
234+
# Build a mapping for case-insensitive and no-prefix lookup
235+
lookup = {}
236+
no_prefix_map = {}
237+
for cat in self.catalog_names:
238+
cat_lower = cat.lower()
239+
lookup[cat_lower] = cat # case-insensitive match
240+
no_prefix = cat_lower.split('.')[-1]
241+
if no_prefix not in no_prefix_map:
242+
no_prefix_map[no_prefix] = [cat] # no-prefix match (first occurrence)
243+
else:
244+
no_prefix_map[no_prefix].append(cat)
245+
246+
# Add unambiguous no-prefix matches to lookup
247+
for no_prefix, cats in no_prefix_map.items():
248+
if len(cats) == 1:
249+
lookup[no_prefix] = cats[0]
250+
251+
# Direct or unambiguous no-prefix match
252+
if catalog in lookup:
253+
return lookup[catalog]
254+
255+
# Check for ambiguous no-prefix matches
256+
if catalog in no_prefix_map and len(no_prefix_map[catalog]) > 1:
257+
matches = ', '.join(no_prefix_map[catalog])
258+
raise InvalidQueryError(
259+
f"Catalog '{catalog}' is ambiguous for collection '{self.name}'. "
260+
f"It matches multiple catalogs: {matches}. Please specify the full catalog name."
211261
)
212-
error_msg += " Available catalogs are: " + ", ".join(self.catalog_names)
213-
raise InvalidQueryError(error_msg)
262+
263+
# Suggest closest match (based on full catalog names)
264+
closest = difflib.get_close_matches(catalog, self.catalog_names, n=1)
265+
suggestion = f" Did you mean '{closest[0]}'?" if closest else ""
266+
267+
raise InvalidQueryError(
268+
f"Catalog '{catalog}' is not recognized for collection '{self.name}'."
269+
f"{suggestion} Available catalogs are: {', '.join(self.catalog_names)}"
270+
)
214271

215272
def _get_column_metadata(self, catalog):
216273
"""
@@ -229,7 +286,9 @@ def _get_column_metadata(self, catalog):
229286
log.debug(f"Fetching column metadata for collection '{self.name}', catalog '{catalog}' from MAST TAP service.")
230287

231288
# Case-insensitive match to find the table
232-
tap_table = next((t for name, t in self.tap_service.tables.items() if name.lower() == catalog.lower()), None)
289+
tap_table = next((t for name, t in self.tap_service.tables.items() if name == catalog), None)
290+
if tap_table is None:
291+
raise InvalidQueryError(f"Catalog '{catalog}' not found in collection '{self.name}'.")
233292

234293
# Extract column metadata
235294
col_names = [col.name for col in tap_table.columns]
@@ -295,17 +354,15 @@ def _verify_criteria(self, catalog, **criteria):
295354
"""
296355
if not criteria:
297356
return
298-
self._verify_catalog(catalog)
299-
col_names = list(self.get_catalog_metadata(catalog)['column_metadata']['name'])
357+
col_names = list(self.get_catalog_metadata(catalog).column_metadata['name'])
300358

301359
# Check each criteria argument for validity
302360
for kwd in criteria.keys():
303361
if kwd not in col_names:
304-
closest_match = difflib.get_close_matches(kwd, col_names, n=1)
305-
error_msg = (
306-
f"Filter '{kwd}' is not recognized for collection '{self.name}' and catalog '{catalog}'. "
307-
f"Did you mean '{closest_match[0]}'?"
308-
if closest_match
309-
else f"Filter '{kwd}' is not recognized for collection '{self.name}' and catalog '{catalog}'."
362+
# Suggest closest match for invalid keyword
363+
closest = difflib.get_close_matches(kwd, col_names, n=1)
364+
suggestion = f" Did you mean '{closest[0]}'?" if closest else ""
365+
raise InvalidQueryError(
366+
f"Filter '{kwd}' is not recognized for collection '{self.name}' and "
367+
f"catalog '{catalog}'.{suggestion}"
310368
)
311-
raise InvalidQueryError(error_msg)

0 commit comments

Comments
 (0)