Skip to content

Commit dd26abf

Browse files
hughdbrownclaude
andauthored
Bug mime date parse (#50)
The original problem was a failing test, described as: > This commit fixes a bug in MIME date parsing related to named timezones (like "MST"). The issue is that Go's `time.Parse` handles named timezone abbreviations inconsistently across platforms - on some systems the offset is known, on others it's not. The fix detects whether a date string contains a numeric offset (e.g., `+0700`, `-05:00`) or `Z` (UTC), and handles conversion differently: However, the fix needed an additional fix: > When parseDate falls back to parsing the original string (with parenthesized timezone), it was using the numericOffset computed from baseStr instead of the original string. This caused incorrect UTC conversion for dates like "Mon, 02 Jan 2006 15:04:05 +0700 (UTC)" where the baseStr has no offset but the original string does. --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 9b2b0f8 commit dd26abf

File tree

2 files changed

+136
-7
lines changed

2 files changed

+136
-7
lines changed

internal/mime/parse.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,35 @@ var dateFormats = []string{
221221
"2006-01-02 15:04:05", // SQL-like without TZ
222222
}
223223

224+
// numericOffsetRe matches numeric timezone offsets like +0000, -0700, +00:00, -07:00
225+
var numericOffsetRe = regexp.MustCompile(`[+-]\d{2}:?\d{2}`)
226+
227+
// hasNumericOffset returns true if the string contains a numeric timezone offset or Z (UTC).
228+
// Named timezones like "MST" have platform-dependent behavior in Go's time.Parse,
229+
// so we need to handle them specially.
230+
func hasNumericOffset(s string) bool {
231+
if strings.HasSuffix(s, "Z") {
232+
return true
233+
}
234+
return numericOffsetRe.MatchString(s)
235+
}
236+
237+
// toUTC converts a time to UTC. If the original had a numeric offset, perform
238+
// proper timezone conversion. Otherwise (named timezone only), keep the same
239+
// local time values but mark them as UTC (since named TZ offsets are unreliable
240+
// across platforms).
241+
func toUTC(t time.Time, numericOffset bool) time.Time {
242+
if numericOffset {
243+
return t.UTC()
244+
}
245+
// Named timezone: keep same time values, mark as UTC
246+
return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC)
247+
}
248+
224249
// parseDate attempts to parse a date string in various formats.
225250
// Returns the time in UTC for consistent storage.
251+
// Named timezones (like "MST") are treated as UTC since their offsets
252+
// can't be reliably determined across platforms.
226253
func parseDate(s string) (time.Time, error) {
227254
// Normalize whitespace efficiently: split on whitespace runs and rejoin
228255
s = strings.Join(strings.Fields(s), " ")
@@ -234,18 +261,23 @@ func parseDate(s string) (time.Time, error) {
234261
baseStr = strings.TrimSpace(s[:idx])
235262
}
236263

264+
// Check if we have a numeric offset for proper UTC conversion
265+
numericOffset := hasNumericOffset(baseStr)
266+
237267
// Try parsing with base string (parenthesized TZ stripped)
238268
for _, format := range dateFormats {
239269
if t, err := time.Parse(format, baseStr); err == nil {
240-
return t.UTC(), nil
270+
return toUTC(t, numericOffset), nil
241271
}
242272
}
243273

244274
// Try original string (some formats expect the parenthesized part)
245275
if baseStr != s {
246276
for _, format := range dateFormats {
247277
if t, err := time.Parse(format, s); err == nil {
248-
return t.UTC(), nil
278+
// Recompute numericOffset for the original string since it may
279+
// have a different offset than baseStr (e.g., "+0700 (UTC)")
280+
return toUTC(t, hasNumericOffset(s)), nil
249281
}
250282
}
251283
}

internal/mime/parse_test.go

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,85 @@ func TestParseReferences(t *testing.T) {
100100
}
101101
}
102102

103+
func TestHasNumericOffset(t *testing.T) {
104+
tests := []struct {
105+
input string
106+
want bool
107+
}{
108+
// Numeric offsets should return true
109+
{"+0700", true},
110+
{"-0700", true},
111+
{"+07:00", true},
112+
{"-07:00", true},
113+
{"Mon, 02 Jan 2006 15:04:05 -0700", true},
114+
{"Mon, 02 Jan 2006 15:04:05 +0000", true},
115+
{"2006-01-02T15:04:05-07:00", true},
116+
{"2006-01-02T15:04:05+00:00", true},
117+
118+
// Z suffix (UTC) should return true
119+
{"2006-01-02T15:04:05Z", true},
120+
{"Z", true},
121+
122+
// Named timezones should return false
123+
{"MST", false},
124+
{"Mon, 02 Jan 2006 15:04:05 MST", false},
125+
{"Mon, 02 Jan 2006 15:04:05 PST", false},
126+
{"Mon Jan 2 15:04:05 MST 2006", false},
127+
128+
// Mixed: numeric offset with parenthesized named TZ should return true
129+
{"Mon, 02 Jan 2006 15:04:05 -0700 (PST)", true},
130+
{"Mon, 02 Jan 2006 15:04:05 +0700 (UTC)", true},
131+
132+
// Empty and no timezone
133+
{"", false},
134+
{"Mon, 02 Jan 2006 15:04:05", false},
135+
}
136+
137+
for _, tc := range tests {
138+
t.Run(tc.input, func(t *testing.T) {
139+
got := hasNumericOffset(tc.input)
140+
if got != tc.want {
141+
t.Errorf("hasNumericOffset(%q) = %v, want %v", tc.input, got, tc.want)
142+
}
143+
})
144+
}
145+
}
146+
147+
func TestToUTC(t *testing.T) {
148+
// Test with numeric offset: should perform proper timezone conversion
149+
t.Run("numeric offset converts to UTC", func(t *testing.T) {
150+
// Create a time at 15:04:05 in -0700 timezone
151+
loc := time.FixedZone("test", -7*60*60)
152+
input := time.Date(2006, 1, 2, 15, 4, 5, 0, loc)
153+
154+
got := toUTC(input, true)
155+
156+
// Should be 22:04:05 UTC (15:04:05 + 7 hours)
157+
want := time.Date(2006, 1, 2, 22, 4, 5, 0, time.UTC)
158+
if !got.Equal(want) {
159+
t.Errorf("toUTC() with numeric offset = %v, want %v", got, want)
160+
}
161+
})
162+
163+
// Test with named timezone: should keep same time values but mark as UTC
164+
t.Run("named timezone keeps same values as UTC", func(t *testing.T) {
165+
// Create a time at 15:04:05 with some location
166+
loc := time.FixedZone("test", -7*60*60)
167+
input := time.Date(2006, 1, 2, 15, 4, 5, 0, loc)
168+
169+
got := toUTC(input, false)
170+
171+
// Should be 15:04:05 UTC (same wall-clock time, different instant)
172+
want := time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)
173+
if !got.Equal(want) {
174+
t.Errorf("toUTC() with named timezone = %v, want %v", got, want)
175+
}
176+
if got.Location() != time.UTC {
177+
t.Errorf("toUTC() location = %v, want UTC", got.Location())
178+
}
179+
})
180+
}
181+
103182
func TestParseDate(t *testing.T) {
104183
// parseDate returns zero time (not error) for unparseable dates.
105184
// This is intentional - malformed dates are common in email and
@@ -110,26 +189,44 @@ func TestParseDate(t *testing.T) {
110189
input string
111190
want time.Time // Zero value means we expect parse failure
112191
}{
113-
// Valid RFC date formats
192+
// Valid RFC date formats with numeric offsets
114193
{"RFC1123Z", "Mon, 02 Jan 2006 15:04:05 -0700",
115194
time.Date(2006, 1, 2, 22, 4, 5, 0, time.UTC)},
116-
{"RFC1123 named zone", "Mon, 2 Jan 2006 15:04:05 MST",
117-
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)}, // MST treated as UTC offset 0 by Go
118195
{"no weekday", "02 Jan 2006 15:04:05 -0700",
119196
time.Date(2006, 1, 2, 22, 4, 5, 0, time.UTC)},
120197
{"parenthesized zone", "Mon, 02 Jan 2006 15:04:05 -0700 (PST)",
121198
time.Date(2006, 1, 2, 22, 4, 5, 0, time.UTC)},
122199
{"double space after comma", "Mon, 2 Dec 2024 11:42:03 +0000 (UTC)",
123200
time.Date(2024, 12, 2, 11, 42, 3, 0, time.UTC)},
124-
{"ISO 8601 UTC", "2006-01-02T15:04:05Z",
125-
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
126201
{"ISO 8601 offset", "2006-01-02T15:04:05-07:00",
127202
time.Date(2006, 1, 2, 22, 4, 5, 0, time.UTC)},
128203
{"SQL-like with tz", "2006-01-02 15:04:05 -0700",
129204
time.Date(2006, 1, 2, 22, 4, 5, 0, time.UTC)},
205+
206+
// Z suffix (UTC) - should work like numeric offset
207+
{"ISO 8601 UTC", "2006-01-02T15:04:05Z",
208+
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
209+
210+
// Named timezone handling - time values kept as-is, marked UTC
211+
// Named TZ offsets are platform-dependent, so we treat them as UTC
212+
{"RFC1123 named zone MST", "Mon, 2 Jan 2006 15:04:05 MST",
213+
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
214+
{"single-digit day named zone", "Mon, 2 Jan 2006 15:04:05 PST",
215+
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
216+
{"no weekday named zone", "02 Jan 2006 15:04:05 EST",
217+
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
218+
{"UnixDate format", "Mon Jan 2 15:04:05 MST 2006",
219+
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
220+
221+
// No timezone at all - treated like named TZ (no offset to convert)
130222
{"SQL-like no tz", "2006-01-02 15:04:05",
131223
time.Date(2006, 1, 2, 15, 4, 5, 0, time.UTC)},
132224

225+
// Edge case: parenthesized TZ differs from main TZ
226+
// The numeric offset (+0700) should be used for conversion
227+
{"numeric offset with different paren TZ", "Mon, 02 Jan 2006 15:04:05 +0700 (UTC)",
228+
time.Date(2006, 1, 2, 8, 4, 5, 0, time.UTC)},
229+
133230
// Invalid/unparseable dates should return zero time
134231
{"empty", "", time.Time{}},
135232
{"garbage", "not a date", time.Time{}},

0 commit comments

Comments
 (0)