Fix hexadecimal parsing and docsbuild failure (#169)

wtbarnes · web-flow · commit 87f56503e4ee · 2026-03-30T13:16:11.000-04:00
* fix hexadecimal parsing

* pandas now seems to convert ints to datetimes

* pandas now seems to convert ints to datetimes

* fix docs failures

* remove datetime test

* code review suggestions from Shane
diff --git a/changelog/169.bugfix.rst b/changelog/169.bugfix.rst
@@ -0,0 +1 @@
+Fixed a bug in parsing of hexadecimal columns.
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -91,7 +91,7 @@ JSOC time strings can be converted to a naive `~datetime.datetime` representatio
     1   2016-04-01 06:00:00
     2   2016-04-01 12:00:00
     3   2016-04-01 18:00:00
-    Name: T_REC, dtype: datetime64[ns]
+    Name: T_REC, dtype: datetime64[us]
 
 For most of the HMI and MDI data sets, the `TAI <https://en.wikipedia.org/wiki/International_Atomic_Time>`__ time standard is used which, in contrast to `UTC <https://en.wikipedia.org/wiki/Coordinated_Universal_Time>`__, does not make use of any leap seconds.
 The TAI standard is currently not supported by the Python standard libraries.
@@ -210,7 +210,7 @@ Note that :meth:`drms.client.Client.export` performs an ``url_quick`` / ``as-is`
     1    /SUM41/D803708361/S00008/Dopplergram.fits
     2    /SUM71/D803720859/S00008/Dopplergram.fits
     3    /SUM70/D803730119/S00008/Dopplergram.fits
-    Name: filename, dtype: object
+    Name: filename, dtype: str
 
 Download URLs can now be generated using the :attr:`drms.client.ExportRequest.urls` attribute:
 
@@ -225,7 +225,7 @@ The following, for example, only downloads the first file of the request:
 
 .. code-block:: python
 
-    >>> export_request.download(out_dir, index=0)  # doctest: +REMOTE_DATA
+    >>> res = export_request.download(out_dir, index=0)  # doctest: +REMOTE_DATA
 
 Being a direct ``as-is`` export, there are no keyword data written to any FITS headers.
 If you need keyword data added to the headers, you have to use the ``fits`` export protocol instead, which is described below.
diff --git a/drms/client.py b/drms/client.py
@@ -11,6 +11,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.api.types import is_object_dtype, is_string_dtype
 
 from drms import logger
 from drms.utils import create_request_with_header
@@ -614,10 +615,9 @@ def __init__(self, server="jsoc", *, email=None):
     def __repr__(self):
         return f"<Client: {self._server.name}>"
 
-    def _convert_numeric_keywords(self, ds, kdf, *, skip_conversion=None):
-        si = self.info(ds)
-        int_keys = list(si.keywords[si.keywords.is_integer].index)
-        num_keys = list(si.keywords[si.keywords.is_numeric].index)
+    def _convert_numeric_keywords(self, keywords, kdf, *, skip_conversion=None):
+        int_keys = list(keywords[keywords.is_integer].index)
+        num_keys = list(keywords[keywords.is_numeric].index)
         num_keys += ["*recnum*", "*sunum*", "*size*"]
         if skip_conversion is None:
             skip_conversion = []
@@ -630,11 +630,12 @@ def _convert_numeric_keywords(self, ds, kdf, *, skip_conversion=None):
             # we need a special treatment for integer strings that start
             # with '0x', like QUALITY. The following to_numeric call is
             # still necessary as the results are still Python objects.
-            if k in int_keys and kdf[k].dtype is np.dtype(object):
-                idx = kdf[k].str.startswith("0x")
+            if k in int_keys and (is_object_dtype(kdf[k]) or is_string_dtype(kdf[k])):
+                values = kdf[k].astype(str)
+                idx = values.str.startswith(("0x", "0X"))
                 if idx.any():
-                    k_idx = kdf.columns.get_loc(k)
-                    kdf.loc[idx, kdf.columns[k_idx]] = kdf.loc[idx, kdf.columns[k_idx]].apply(int, base=16)
+                    kdf[k] = kdf[k].astype(object)
+                    kdf.loc[idx, k] = values[idx].apply(int, base=16)
             if k in num_keys:
                 kdf[k] = _pd_to_numeric_coerce(kdf[k])
 
@@ -1029,7 +1030,7 @@ def query(
             else:
                 res_key = pd.DataFrame()
             if convert_numeric:
-                self._convert_numeric_keywords(ds, res_key, skip_conversion=skip_conversion)
+                self._convert_numeric_keywords(self.info(ds).keywords, res_key, skip_conversion=skip_conversion)
             res.append(res_key)
 
         if seg is not None:
diff --git a/drms/tests/test_jsoc_query.py b/drms/tests/test_jsoc_query.py
@@ -1,3 +1,4 @@
+import pandas as pd
 import pytest
 
 import drms
@@ -100,9 +101,20 @@ def test_query_invalid_series(jsoc_client):
     [
         "hmi.v_45s[2014.01.01_00:00:35_TAI-2014.01.01_01:00:35_TAI]",
         "hmi.M_720s[2011.04.14_00:30:00_TAI/6h@2h]",
+        "aia.lev1_euv_12s[2014-01-01T00:00:01Z/365d@1d][335]",
     ],
 )
 def test_query_hexadecimal_strings(query):
     # Exercise the part of client.py that deals with hexadecimal strings
     c = drms.Client()
-    c.query(query, key="**ALL**")
+    result = c.query(query, key=["T_REC", "QUALITY", "CRPIX1", "CRVAL1", "BUNIT"])
+    assert pd.api.types.is_integer_dtype(result["QUALITY"])
+
+
+def test_query_quality_hex_decimal_conversion():
+    c = drms.Client()
+    keywords = pd.DataFrame({"is_integer": [True], "is_numeric": [True]}, index=["QUALITY"])
+    df = pd.DataFrame({"QUALITY": pd.Series(["0x00000000", "0x0000000A", "0X000000FF"], dtype="string")})
+    c._convert_numeric_keywords(keywords, df)
+    assert df["QUALITY"].tolist() == [0, 10, 255]
+    assert pd.api.types.is_integer_dtype(df["QUALITY"])
diff --git a/drms/tests/test_to_datetime.py b/drms/tests/test_to_datetime.py
@@ -82,7 +82,6 @@ def test_time_series(time_series, expected):
     ("2010.05.01_TAI", False),
     ("2010.05.01_00:00_TAI", False),
     ("", True),
-    ("1600", True),
     ("foo", True),
     ("2013.12.21_23:32:34_TAI", False),
 ]
diff --git a/examples/plot_aia_lightcurve.py b/examples/plot_aia_lightcurve.py
@@ -40,9 +40,10 @@
 print("Querying series info...")
 series_info = client.info("aia.lev1_euv_12s")
 series_info_lev1 = client.info("aia.lev1")
+linkinfo = series_info.keywords.linkinfo
+missing_linkinfo = linkinfo.isna()
 for key in keys:
-    linkinfo = series_info.keywords.loc[key].linkinfo
-    if linkinfo is not None and linkinfo.startswith("lev1->"):
+    if not missing_linkinfo.loc[key] and linkinfo.loc[key].startswith("lev1->"):
         note_str = series_info_lev1.keywords.loc[key].note
     else:
         note_str = series_info.keywords.loc[key].note

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Fixed a bug in parsing of hexadecimal columns.`
Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,6 @@ def test_time_series(time_series, expected):`
`82`	`82`	`("2010.05.01_TAI", False),`
`83`	`83`	`("2010.05.01_00:00_TAI", False),`
`84`	`84`	`("", True),`
`85`		`- ("1600", True),`
`86`	`85`	`("foo", True),`
`87`	`86`	`("2013.12.21_23:32:34_TAI", False),`
`88`	`87`	`]`