From d85c420749fa7f95c897e4ee017c780e48ead831 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Thu, 25 Jun 2026 01:58:52 +0200 Subject: [PATCH] fix: %j (day of year) format directive ignored month/day in parse_with_formats When a user-supplied date format contained %j (day of year), parse_with_formats incorrectly treated the parsed date as having no month and no day. This caused it to overwrite the correctly-parsed month and day with the current-date defaults instead of preserving what strptime extracted. Root cause: missing_day was computed as '"%d" not in date_format', missing %j (and %-j) as day-of-year directives. A successful strptime with %j always populates both the day and month fields (e.g. day 100 of 2023 = April 10). The pre-existing _get_missing_parts helper already listed %j in the day directive mapping but not in month. Fix: add %j and %-j to the month directive mapping in _get_missing_parts, and replace the ad-hoc missing_month/missing_day checks in parse_with_formats with _get_missing_parts, which is the authoritative source for this logic. Before: dateparser.parse("2023-100", date_formats=["%Y-%j"]) -> datetime(2023, 6, 25) # wrong: June 25 = today After: dateparser.parse("2023-100", date_formats=["%Y-%j"]) -> datetime(2023, 4, 10) # correct: day 100 of 2023 --- dateparser/date.py | 6 ++++-- dateparser/utils/__init__.py | 4 +++- tests/test_date.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/dateparser/date.py b/dateparser/date.py index 094591707..cef23338b 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -14,6 +14,7 @@ from dateparser.parser import _parse_absolute, _parse_nospaces from dateparser.timezone_parser import pop_tz_offset_from_string from dateparser.utils import ( + _get_missing_parts, apply_timezone_from_settings, get_timezone_from_tz_string, set_correct_day_from_settings, @@ -187,8 +188,9 @@ def parse_with_formats(date_string, date_formats, settings): except ValueError: continue else: - missing_month = not any(m in date_format for m in ["%m", "%b", "%B"]) - missing_day = "%d" not in date_format + _missing = _get_missing_parts(date_format) + missing_month = "month" in _missing + missing_day = "day" in _missing if missing_month and missing_day: period = "year" date_obj = set_correct_month_from_settings(date_obj, settings) diff --git a/dateparser/utils/__init__.py b/dateparser/utils/__init__.py index 023c5fbb3..ccbe9eac5 100644 --- a/dateparser/utils/__init__.py +++ b/dateparser/utils/__init__.py @@ -57,7 +57,9 @@ def _get_missing_parts(fmt): """ directive_mapping = { "day": ["%d", "%-d", "%j", "%-j"], - "month": ["%b", "%B", "%m", "%-m"], + # %j (day of year) encodes month implicitly: a successful strptime with %j always + # populates the month field, so %j should count as providing month information. + "month": ["%b", "%B", "%m", "%-m", "%j", "%-j"], "year": ["%y", "%-y", "%Y"], } diff --git a/tests/test_date.py b/tests/test_date.py index 7c8759b89..d2bd2987e 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -374,6 +374,34 @@ def test_should_parse_date(self, date_string, date_formats, expected_result): self.then_parsed_period_is("day") self.then_parsed_date_is(expected_result) + @parameterized.expand( + [ + param( + date_string="2023-100", + date_formats=["%Y-%j"], + expected_result=datetime(2023, 4, 10), + ), + param( + date_string="2024-060", + date_formats=["%Y-%j"], + expected_result=datetime(2024, 2, 29), + ), + param( + date_string="2023 060", + date_formats=["%Y %j"], + expected_result=datetime(2023, 3, 1), + ), + ] + ) + def test_should_parse_day_of_year_format( + self, date_string, date_formats, expected_result + ): + """Format %%j (day of year) should correctly set both month and day.""" + self.when_date_is_parsed_with_formats(date_string, date_formats) + self.then_date_was_parsed() + self.then_parsed_period_is("day") + self.then_parsed_date_is(expected_result) + @parameterized.expand( [ param(