Skip to content

Commit 379b9d3

Browse files
sergio-sisternes-epamSergio SisternesCopilot
authored
fix: preserve DependencyReference through download pipeline (#382) (#383)
* fix: preserve DependencyReference through download pipeline (#382) Subdirectory packages on non-GitHub/ADO hosts (e.g. GitLab, Gitea) failed because the download pipeline converted DependencyReference to a string via str() and re-parsed it in download_package(). The str() → parse() round-trip lost the repo_url / virtual_path boundary on generic hosts where _detect_virtual_package() uses a dynamic min_base_segments heuristic. Fix: pass the structured DependencyReference object through the pipeline instead of flattening to string. Specifically: - download_package() and resolve_git_reference() now accept Union[str, DependencyReference], skipping the parse when already structured - build_download_ref() returns a DependencyReference (via dataclasses.replace) instead of a flat string - All call sites in install.py and _utils.py updated to pass the object directly Closes #382 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: address Copilot review feedback - Normalize original_ref to string in resolve_git_reference() to avoid leaking DependencyReference objects into ResolvedReference.original_ref - Replace shallow test with one that verifies DependencyReference.parse() is NOT called when passing a structured object to download_package() - Update test_resolve_git_reference_branch to match normalized original_ref Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Sergio Sisternes <sergio.sisternes@epam.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6457eac commit 379b9d3

7 files changed

Lines changed: 176 additions & 74 deletions

File tree

src/apm_cli/commands/deps/_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def _update_single_package(package_name: str, project_deps: List, apm_modules_pa
242242
_rich_info(f"Updating {target_dep.repo_url}...")
243243

244244
# Download latest version
245-
package_info = downloader.download_package(str(target_dep), package_dir)
245+
package_info = downloader.download_package(target_dep, package_dir)
246246

247247
_rich_success(f"[+] Updated {target_dep.repo_url}")
248248

@@ -285,7 +285,7 @@ def _update_all_packages(project_deps: List, apm_modules_path: Path):
285285

286286
try:
287287
_rich_info(f" Updating {dep.repo_url}...")
288-
package_info = downloader.download_package(str(dep), package_dir)
288+
package_info = downloader.download_package(dep, package_dir)
289289
updated_count += 1
290290
_rich_success(f" [+] {dep.repo_url}")
291291

src/apm_cli/commands/install.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -928,31 +928,23 @@ def download_callback(dep_ref, modules_dir):
928928
return result_path
929929
return None
930930

931-
# Build repo_ref string - include host for GHE/ADO/Artifactory, plus reference if specified
932-
repo_ref = dep_ref.repo_url
933-
if dep_ref.host and dep_ref.host not in ("github.com", None):
934-
if dep_ref.artifactory_prefix:
935-
repo_ref = f"{dep_ref.host}/{dep_ref.artifactory_prefix}/{dep_ref.repo_url}"
936-
else:
937-
repo_ref = f"{dep_ref.host}/{dep_ref.repo_url}"
938-
if dep_ref.virtual_path:
939-
repo_ref = f"{repo_ref}/{dep_ref.virtual_path}"
940-
941931
# T5: Use locked commit if available (reproducible installs)
942932
locked_ref = None
943933
if existing_lockfile:
944934
locked_dep = existing_lockfile.get_dependency(dep_ref.get_unique_key())
945935
if locked_dep and locked_dep.resolved_commit and locked_dep.resolved_commit != "cached":
946936
locked_ref = locked_dep.resolved_commit
947937

948-
# Priority: locked commit > explicit reference > default branch
938+
# Build a DependencyReference with the right ref to avoid lossy
939+
# str() → parse() round-trips (#382).
940+
from dataclasses import replace as _dc_replace
949941
if locked_ref:
950-
repo_ref = f"{repo_ref}#{locked_ref}"
951-
elif dep_ref.reference:
952-
repo_ref = f"{repo_ref}#{dep_ref.reference}"
942+
download_dep = _dc_replace(dep_ref, reference=locked_ref)
943+
else:
944+
download_dep = dep_ref
953945

954946
# Silent download - no progress display for transitive deps
955-
result = downloader.download_package(repo_ref, install_path)
947+
result = downloader.download_package(download_dep, install_path)
956948
# Capture resolved commit SHA for lockfile
957949
resolved_sha = None
958950
if result and hasattr(result, 'resolved_reference') and result.resolved_reference:
@@ -1367,9 +1359,7 @@ def _collect_descendants(node, visited=None):
13671359
resolved_ref = None
13681360
if dep_ref.reference and dep_ref.get_unique_key() not in _pre_downloaded_keys:
13691361
try:
1370-
resolved_ref = downloader.resolve_git_reference(
1371-
f"{dep_ref.repo_url}@{dep_ref.reference}"
1372-
)
1362+
resolved_ref = downloader.resolve_git_reference(dep_ref)
13731363
except Exception:
13741364
pass # If resolution fails, skip cache (fetch latest)
13751365

src/apm_cli/deps/github_downloader.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import Optional, Dict, Any, Callable
1212
import random
1313
import re
14+
from typing import Union
1415
import requests
1516

1617
import git
@@ -648,11 +649,13 @@ def _clone_with_fallback(self, repo_url_base: str, target_path: Path, progress_r
648649

649650
raise RuntimeError(error_msg)
650651

651-
def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
652+
def resolve_git_reference(self, repo_ref: Union[str, "DependencyReference"]) -> ResolvedReference:
652653
"""Resolve a Git reference (branch/tag/commit) to a specific commit SHA.
653654
654655
Args:
655-
repo_ref: Repository reference string (e.g., "user/repo#branch")
656+
repo_ref: Repository reference — either a DependencyReference object
657+
or a string (e.g., "user/repo#branch"). Passing the object
658+
directly avoids a lossy parse round-trip for generic git hosts.
656659
657660
Returns:
658661
ResolvedReference: Resolved reference with commit SHA
@@ -661,23 +664,29 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
661664
ValueError: If the reference format is invalid
662665
RuntimeError: If Git operations fail
663666
"""
664-
# Parse the repository reference
665-
try:
666-
dep_ref = DependencyReference.parse(repo_ref)
667-
except ValueError as e:
668-
raise ValueError(f"Invalid repository reference '{repo_ref}': {e}")
667+
# Accept both string and DependencyReference to avoid lossy round-trips
668+
if isinstance(repo_ref, DependencyReference):
669+
dep_ref = repo_ref
670+
else:
671+
try:
672+
dep_ref = DependencyReference.parse(repo_ref)
673+
except ValueError as e:
674+
raise ValueError(f"Invalid repository reference '{repo_ref}': {e}")
669675

670676
# Default to main branch if no reference specified
671677
ref = dep_ref.reference or "main"
672678

679+
# Normalize to string for ResolvedReference.original_ref
680+
original_ref_str = str(dep_ref)
681+
673682
# Artifactory: no git repo to query, return ref-based resolution
674683
if dep_ref.is_artifactory() or (
675684
self._parse_artifactory_base_url()
676685
and self._should_use_artifactory_proxy(dep_ref)
677686
):
678687
is_commit = re.match(r'^[a-f0-9]{7,40}$', ref.lower()) is not None
679688
return ResolvedReference(
680-
original_ref=repo_ref,
689+
original_ref=original_ref_str,
681690
ref_type=GitReferenceType.COMMIT if is_commit else GitReferenceType.BRANCH,
682691
resolved_commit=None,
683692
ref_name=ref
@@ -765,7 +774,7 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
765774
shutil.rmtree(temp_dir, ignore_errors=True)
766775

767776
return ResolvedReference(
768-
original_ref=repo_ref,
777+
original_ref=original_ref_str,
769778
ref_type=ref_type,
770779
resolved_commit=resolved_commit,
771780
ref_name=ref_name
@@ -1767,7 +1776,7 @@ def _download_package_from_artifactory(
17671776

17681777
def download_package(
17691778
self,
1770-
repo_ref: str,
1779+
repo_ref: Union[str, "DependencyReference"],
17711780
target_path: Path,
17721781
progress_task_id=None,
17731782
progress_obj=None
@@ -1778,7 +1787,9 @@ def download_package(
17781787
package structure instead of cloning the full repository.
17791788
17801789
Args:
1781-
repo_ref: Repository reference string (e.g., "user/repo#branch" or "user/repo/path/file.prompt.md")
1790+
repo_ref: Repository reference — either a DependencyReference object
1791+
or a string (e.g., "user/repo#branch"). Passing the object
1792+
directly avoids a lossy parse round-trip for generic git hosts.
17821793
target_path: Local path where package should be downloaded
17831794
progress_task_id: Rich Progress task ID for progress updates
17841795
progress_obj: Rich Progress object for progress updates
@@ -1790,11 +1801,14 @@ def download_package(
17901801
ValueError: If the repository reference is invalid
17911802
RuntimeError: If download or validation fails
17921803
"""
1793-
# Parse the repository reference
1794-
try:
1795-
dep_ref = DependencyReference.parse(repo_ref)
1796-
except ValueError as e:
1797-
raise ValueError(f"Invalid repository reference '{repo_ref}': {e}")
1804+
# Accept both string and DependencyReference to avoid lossy round-trips
1805+
if isinstance(repo_ref, DependencyReference):
1806+
dep_ref = repo_ref
1807+
else:
1808+
try:
1809+
dep_ref = DependencyReference.parse(repo_ref)
1810+
except ValueError as e:
1811+
raise ValueError(f"Invalid repository reference '{repo_ref}': {e}")
17981812

17991813
# Handle virtual packages differently
18001814
if dep_ref.is_virtual:
@@ -1829,12 +1843,12 @@ def download_package(
18291843
# When ARTIFACTORY_ONLY is set but no Artifactory proxy matched, block direct git
18301844
if self._is_artifactory_only():
18311845
raise RuntimeError(
1832-
f"ARTIFACTORY_ONLY is set but no Artifactory proxy is configured for '{repo_ref}'. "
1846+
f"ARTIFACTORY_ONLY is set but no Artifactory proxy is configured for '{dep_ref}'. "
18331847
"Set ARTIFACTORY_BASE_URL or use explicit Artifactory FQDN syntax."
18341848
)
18351849

18361850
# Regular package download (existing logic)
1837-
resolved_ref = self.resolve_git_reference(repo_ref)
1851+
resolved_ref = self.resolve_git_reference(dep_ref)
18381852

18391853
# Create target directory if it doesn't exist
18401854
target_path.mkdir(parents=True, exist_ok=True)

src/apm_cli/drift.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from __future__ import annotations
4545

4646
import builtins
47+
from dataclasses import replace as _dataclass_replace
4748
from pathlib import Path
4849
from typing import TYPE_CHECKING, Dict, List, Optional, Set
4950

@@ -171,8 +172,13 @@ def build_download_ref(
171172
*,
172173
update_refs: bool,
173174
ref_changed: bool,
174-
) -> str:
175-
"""Build the download-ref string passed to the package downloader.
175+
) -> "DependencyReference":
176+
"""Build the dependency reference passed to the package downloader.
177+
178+
Returns a :class:`DependencyReference` (not a flat string) so that
179+
structured fields like ``virtual_path`` survive the trip to
180+
``download_package()`` without a lossy ``str()`` → ``parse()``
181+
round-trip. See :issue:`382`.
176182
177183
Uses the locked commit SHA for reproducibility, unless:
178184
* ``update_refs=True`` — intentional update run; use the manifest ref.
@@ -186,20 +192,11 @@ def build_download_ref(
186192
this dependency.
187193
188194
Returns:
189-
A ref string suitable for ``GitHubPackageDownloader.download_package``.
195+
A :class:`DependencyReference` suitable for
196+
``GitHubPackageDownloader.download_package``.
190197
"""
191-
download_ref = str(dep_ref)
192198
if existing_lockfile and not update_refs and not ref_changed:
193199
locked_dep = existing_lockfile.get_dependency(dep_ref.get_unique_key())
194200
if locked_dep and locked_dep.resolved_commit and locked_dep.resolved_commit != "cached":
195-
# Include the host so the downloader can resolve the correct
196-
# server (e.g. GitHub Enterprise custom domains). Without it
197-
# ``DependencyReference.parse()`` would fall back to github.com.
198-
if dep_ref.host:
199-
base_ref = f"{dep_ref.host}/{dep_ref.repo_url}"
200-
else:
201-
base_ref = dep_ref.repo_url
202-
if dep_ref.virtual_path:
203-
base_ref = f"{base_ref}/{dep_ref.virtual_path}"
204-
download_ref = f"{base_ref}#{locked_dep.resolved_commit}"
205-
return download_ref
201+
return _dataclass_replace(dep_ref, reference=locked_dep.resolved_commit)
202+
return dep_ref

tests/test_apm_package_models.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1354,4 +1354,96 @@ def test_package_dataclass_with_type(self):
13541354
def test_package_dataclass_type_defaults_to_none(self):
13551355
"""Test that APMPackage type defaults to None when not provided."""
13561356
package = APMPackage(name="test", version="1.0.0")
1357-
assert package.type is None
1357+
assert package.type is None
1358+
1359+
1360+
class TestGenericHostSubdirectoryRoundTrip:
1361+
"""Regression tests for issue #382: subdirectory packages on generic git hosts.
1362+
1363+
The str() → parse() round-trip must preserve virtual_path for all hosts,
1364+
not just GitHub and ADO.
1365+
"""
1366+
1367+
@pytest.mark.parametrize("git_url,path,ref,desc", [
1368+
("https://git.example.com/ai/grandpa-s-skills", "dist/brain-council", "master", "reporter case"),
1369+
("https://gitlab.com/my-org/my-group/my-skills", "dist/skill-a", "main", "GitLab nested groups"),
1370+
("https://gitea.example.com/org/repo", "prompts/helper", "v1.0", "Gitea simple"),
1371+
("https://bitbucket.example.com/team/prompts", "agents/summarizer", "develop", "Bitbucket self-hosted"),
1372+
])
1373+
def test_parse_from_dict_preserves_virtual_path(self, git_url, path, ref, desc):
1374+
"""parse_from_dict correctly separates repo URL from subdirectory path."""
1375+
entry = {"git": git_url, "path": path, "ref": ref}
1376+
dep = DependencyReference.parse_from_dict(entry)
1377+
assert dep.virtual_path == path, f"Failed for {desc}"
1378+
assert dep.is_virtual is True, f"Failed for {desc}"
1379+
1380+
@pytest.mark.parametrize("git_url,path,ref", [
1381+
("https://git.example.com/ai/grandpa-s-skills", "dist/brain-council", "master"),
1382+
("https://gitlab.com/org/repo", "prompts/helper", "v1.0"),
1383+
("https://bitbucket.example.com/team/prompts", "agents/summarizer", "develop"),
1384+
])
1385+
def test_download_package_skips_parse_with_structured_dep(self, git_url, path, ref):
1386+
"""download_package must skip DependencyReference.parse() when given
1387+
a structured object, avoiding the lossy round-trip."""
1388+
entry = {"git": git_url, "path": path, "ref": ref}
1389+
dep = DependencyReference.parse_from_dict(entry)
1390+
1391+
from apm_cli.deps.github_downloader import GitHubPackageDownloader
1392+
downloader = GitHubPackageDownloader()
1393+
1394+
# Monkey-patch DependencyReference.parse to detect if it's called
1395+
original_parse = DependencyReference.parse
1396+
parse_called = False
1397+
@classmethod
1398+
def tracking_parse(cls, s):
1399+
nonlocal parse_called
1400+
parse_called = True
1401+
return original_parse(s)
1402+
1403+
DependencyReference.parse = tracking_parse
1404+
try:
1405+
# download_package will fail on the actual clone, but the important
1406+
# thing is that it does NOT call parse() when given an object
1407+
downloader.download_package(dep, Path("/tmp/apm-test-nonexistent"))
1408+
except Exception:
1409+
pass # Expected: clone will fail, but parse should not be called
1410+
finally:
1411+
DependencyReference.parse = original_parse
1412+
1413+
assert not parse_called, (
1414+
"DependencyReference.parse() was called when passing a structured "
1415+
"DependencyReference — the lossy round-trip was NOT avoided"
1416+
)
1417+
1418+
def test_github_round_trip_works(self):
1419+
"""GitHub round-trip works because min_base_segments=2 is hardcoded."""
1420+
entry = {"git": "https://github.com/anthropics/skills", "path": "skills/skill-creator", "ref": "main"}
1421+
dep = DependencyReference.parse_from_dict(entry)
1422+
dep2 = DependencyReference.parse(str(dep))
1423+
assert dep2.virtual_path == dep.virtual_path
1424+
assert dep2.is_virtual == dep.is_virtual
1425+
1426+
def test_build_download_ref_preserves_virtual_path(self):
1427+
"""build_download_ref returns a DependencyReference that preserves
1428+
virtual_path for generic hosts (not a lossy flat string)."""
1429+
from apm_cli.drift import build_download_ref
1430+
from unittest.mock import Mock
1431+
1432+
dep = DependencyReference(
1433+
repo_url="org/my-skills",
1434+
host="git.example.com",
1435+
reference="main",
1436+
virtual_path="dist/brain-council",
1437+
is_virtual=True,
1438+
)
1439+
lockfile = Mock()
1440+
locked_dep = Mock()
1441+
locked_dep.resolved_commit = "abc123"
1442+
lockfile.get_dependency = Mock(return_value=locked_dep)
1443+
1444+
result = build_download_ref(dep, lockfile, update_refs=False, ref_changed=False)
1445+
assert result.virtual_path == "dist/brain-council"
1446+
assert result.repo_url == "org/my-skills"
1447+
assert result.host == "git.example.com"
1448+
assert result.reference == "abc123"
1449+
assert result.is_virtual is True

tests/test_github_downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_resolve_git_reference_branch(self, mock_mkdtemp, mock_repo_class):
8888
result = self.downloader.resolve_git_reference('user/repo#main')
8989

9090
assert isinstance(result, ResolvedReference)
91-
assert result.original_ref == 'user/repo#main'
91+
assert result.original_ref == 'github.com/user/repo#main'
9292
assert result.ref_type == GitReferenceType.BRANCH
9393
assert result.resolved_commit == 'abc123def456'
9494
assert result.ref_name == 'main'

0 commit comments

Comments
 (0)