Skip to content

Commit 87f5650

Browse files
authored
Fix hexadecimal parsing and docsbuild failure (#169)
* fix hexadecimal parsing * pandas now seems to convert ints to datetimes * pandas now seems to convert ints to datetimes * fix docs failures * remove datetime test * code review suggestions from Shane
1 parent 267a612 commit 87f5650

File tree

6 files changed

+30
-16
lines changed

6 files changed

+30
-16
lines changed

changelog/169.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed a bug in parsing of hexadecimal columns.

docs/tutorial.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ JSOC time strings can be converted to a naive `~datetime.datetime` representatio
9191
1 2016-04-01 06:00:00
9292
2 2016-04-01 12:00:00
9393
3 2016-04-01 18:00:00
94-
Name: T_REC, dtype: datetime64[ns]
94+
Name: T_REC, dtype: datetime64[us]
9595
9696
For most of the HMI and MDI data sets, the `TAI <https://en.wikipedia.org/wiki/International_Atomic_Time>`__ time standard is used which, in contrast to `UTC <https://en.wikipedia.org/wiki/Coordinated_Universal_Time>`__, does not make use of any leap seconds.
9797
The TAI standard is currently not supported by the Python standard libraries.
@@ -210,7 +210,7 @@ Note that :meth:`drms.client.Client.export` performs an ``url_quick`` / ``as-is`
210210
1 /SUM41/D803708361/S00008/Dopplergram.fits
211211
2 /SUM71/D803720859/S00008/Dopplergram.fits
212212
3 /SUM70/D803730119/S00008/Dopplergram.fits
213-
Name: filename, dtype: object
213+
Name: filename, dtype: str
214214
215215
Download URLs can now be generated using the :attr:`drms.client.ExportRequest.urls` attribute:
216216

@@ -225,7 +225,7 @@ The following, for example, only downloads the first file of the request:
225225

226226
.. code-block:: python
227227
228-
>>> export_request.download(out_dir, index=0) # doctest: +REMOTE_DATA
228+
>>> res = export_request.download(out_dir, index=0) # doctest: +REMOTE_DATA
229229
230230
Being a direct ``as-is`` export, there are no keyword data written to any FITS headers.
231231
If you need keyword data added to the headers, you have to use the ``fits`` export protocol instead, which is described below.

drms/client.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import numpy as np
1313
import pandas as pd
14+
from pandas.api.types import is_object_dtype, is_string_dtype
1415

1516
from drms import logger
1617
from drms.utils import create_request_with_header
@@ -614,10 +615,9 @@ def __init__(self, server="jsoc", *, email=None):
614615
def __repr__(self):
615616
return f"<Client: {self._server.name}>"
616617

617-
def _convert_numeric_keywords(self, ds, kdf, *, skip_conversion=None):
618-
si = self.info(ds)
619-
int_keys = list(si.keywords[si.keywords.is_integer].index)
620-
num_keys = list(si.keywords[si.keywords.is_numeric].index)
618+
def _convert_numeric_keywords(self, keywords, kdf, *, skip_conversion=None):
619+
int_keys = list(keywords[keywords.is_integer].index)
620+
num_keys = list(keywords[keywords.is_numeric].index)
621621
num_keys += ["*recnum*", "*sunum*", "*size*"]
622622
if skip_conversion is None:
623623
skip_conversion = []
@@ -630,11 +630,12 @@ def _convert_numeric_keywords(self, ds, kdf, *, skip_conversion=None):
630630
# we need a special treatment for integer strings that start
631631
# with '0x', like QUALITY. The following to_numeric call is
632632
# still necessary as the results are still Python objects.
633-
if k in int_keys and kdf[k].dtype is np.dtype(object):
634-
idx = kdf[k].str.startswith("0x")
633+
if k in int_keys and (is_object_dtype(kdf[k]) or is_string_dtype(kdf[k])):
634+
values = kdf[k].astype(str)
635+
idx = values.str.startswith(("0x", "0X"))
635636
if idx.any():
636-
k_idx = kdf.columns.get_loc(k)
637-
kdf.loc[idx, kdf.columns[k_idx]] = kdf.loc[idx, kdf.columns[k_idx]].apply(int, base=16)
637+
kdf[k] = kdf[k].astype(object)
638+
kdf.loc[idx, k] = values[idx].apply(int, base=16)
638639
if k in num_keys:
639640
kdf[k] = _pd_to_numeric_coerce(kdf[k])
640641

@@ -1029,7 +1030,7 @@ def query(
10291030
else:
10301031
res_key = pd.DataFrame()
10311032
if convert_numeric:
1032-
self._convert_numeric_keywords(ds, res_key, skip_conversion=skip_conversion)
1033+
self._convert_numeric_keywords(self.info(ds).keywords, res_key, skip_conversion=skip_conversion)
10331034
res.append(res_key)
10341035

10351036
if seg is not None:

drms/tests/test_jsoc_query.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import pandas as pd
12
import pytest
23

34
import drms
@@ -100,9 +101,20 @@ def test_query_invalid_series(jsoc_client):
100101
[
101102
"hmi.v_45s[2014.01.01_00:00:35_TAI-2014.01.01_01:00:35_TAI]",
102103
"hmi.M_720s[2011.04.14_00:30:00_TAI/6h@2h]",
104+
"aia.lev1_euv_12s[2014-01-01T00:00:01Z/365d@1d][335]",
103105
],
104106
)
105107
def test_query_hexadecimal_strings(query):
106108
# Exercise the part of client.py that deals with hexadecimal strings
107109
c = drms.Client()
108-
c.query(query, key="**ALL**")
110+
result = c.query(query, key=["T_REC", "QUALITY", "CRPIX1", "CRVAL1", "BUNIT"])
111+
assert pd.api.types.is_integer_dtype(result["QUALITY"])
112+
113+
114+
def test_query_quality_hex_decimal_conversion():
115+
c = drms.Client()
116+
keywords = pd.DataFrame({"is_integer": [True], "is_numeric": [True]}, index=["QUALITY"])
117+
df = pd.DataFrame({"QUALITY": pd.Series(["0x00000000", "0x0000000A", "0X000000FF"], dtype="string")})
118+
c._convert_numeric_keywords(keywords, df)
119+
assert df["QUALITY"].tolist() == [0, 10, 255]
120+
assert pd.api.types.is_integer_dtype(df["QUALITY"])

drms/tests/test_to_datetime.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def test_time_series(time_series, expected):
8282
("2010.05.01_TAI", False),
8383
("2010.05.01_00:00_TAI", False),
8484
("", True),
85-
("1600", True),
8685
("foo", True),
8786
("2013.12.21_23:32:34_TAI", False),
8887
]

examples/plot_aia_lightcurve.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@
4040
print("Querying series info...")
4141
series_info = client.info("aia.lev1_euv_12s")
4242
series_info_lev1 = client.info("aia.lev1")
43+
linkinfo = series_info.keywords.linkinfo
44+
missing_linkinfo = linkinfo.isna()
4345
for key in keys:
44-
linkinfo = series_info.keywords.loc[key].linkinfo
45-
if linkinfo is not None and linkinfo.startswith("lev1->"):
46+
if not missing_linkinfo.loc[key] and linkinfo.loc[key].startswith("lev1->"):
4647
note_str = series_info_lev1.keywords.loc[key].note
4748
else:
4849
note_str = series_info.keywords.loc[key].note

0 commit comments

Comments
 (0)