From 3cc047ada1f81adc598b0a11ae1dd61e0bb286c4 Mon Sep 17 00:00:00 2001
From: Yonghye Kwon <developer.0hye@gmail.com>
Date: Mon, 9 Mar 2026 14:59:22 +0900
Subject: [PATCH 1/6] feat: strip common prefix from ZIP archives

GitHub-style ZIP archives nest all files under a single directory
(e.g. `repo-main/`). Strip this prefix in both ZipMemoryFileSystem
(analysis) and materialize_zip_workspace (extraction) so entry
detection and asset resolution work correctly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
---
 crates/marknest-core/src/lib.rs           | 31 +++++++++
 crates/marknest-core/tests/analyze_zip.rs | 76 +++++++++++++++++++++++
 crates/marknest/src/lib.rs                | 47 ++++++++++++--
 3 files changed, 149 insertions(+), 5 deletions(-)

diff --git a/crates/marknest-core/src/lib.rs b/crates/marknest-core/src/lib.rs
index 635e992..a1c03fb 100644
--- a/crates/marknest-core/src/lib.rs
+++ b/crates/marknest-core/src/lib.rs
@@ -431,11 +431,42 @@ impl ZipMemoryFileSystem {
             });
         }
 
+        strip_common_prefix(&mut files);
         files.sort_by(|left, right| left.normalized_path.cmp(&right.normalized_path));
         Ok(Self { files })
     }
 }
 
+/// If every file shares the same first path segment (e.g. `repo-main/`),
+/// strip that segment from all paths. This handles GitHub-style archives
+/// that nest everything under `{repo}-{ref}/`.
+fn strip_common_prefix(files: &mut [IndexedFile]) {
+    if files.is_empty() {
+        return;
+    }
+
+    let common: String = match files[0].normalized_path.split('/').next() {
+        Some(segment) => segment.to_string(),
+        None => return,
+    };
+
+    // All files must share the same first segment AND have content after it
+    let all_share_prefix = files.iter().all(|file| {
+        file.normalized_path.starts_with(&common)
+            && file.normalized_path.len() > common.len()
+            && file.normalized_path.as_bytes()[common.len()] == b'/'
+    });
+
+    if !all_share_prefix {
+        return;
+    }
+
+    let strip_len: usize = common.len() + 1; // include the '/'
+    for file in files.iter_mut() {
+        file.normalized_path = file.normalized_path[strip_len..].to_string();
+    }
+}
+
 impl IndexedFileSystem for ZipMemoryFileSystem {
     fn source_kind(&self) -> ProjectSourceKind {
         ProjectSourceKind::Zip
diff --git a/crates/marknest-core/tests/analyze_zip.rs b/crates/marknest-core/tests/analyze_zip.rs
index 3c60d64..943ab08 100644
--- a/crates/marknest-core/tests/analyze_zip.rs
+++ b/crates/marknest-core/tests/analyze_zip.rs
@@ -69,3 +69,79 @@ fn rejects_windows_drive_paths_inside_zip() {
         }
     );
 }
+
+#[test]
+fn strips_common_prefix_from_github_style_zip() {
+    let bytes = build_zip(&[
+        (
+            "repo-main/README.md",
+            "# Hello\n\n![Logo](./images/logo.png)\n",
+        ),
+        ("repo-main/images/logo.png", "fake-png-bytes"),
+    ]);
+
+    let index = analyze_zip(&bytes).expect("github-style zip should analyze");
+
+    assert_eq!(index.selected_entry.as_deref(), Some("README.md"));
+    assert_eq!(index.entry_selection_reason, EntrySelectionReason::Readme);
+
+    let candidate_paths: Vec<&str> = index
+        .entry_candidates
+        .iter()
+        .map(|candidate| candidate.path.as_str())
+        .collect();
+    assert_eq!(candidate_paths, vec!["README.md"]);
+
+    let resolved_asset_paths: Vec<Option<&str>> = index
+        .assets
+        .iter()
+        .map(|asset| asset.resolved_path.as_deref())
+        .collect();
+    assert_eq!(resolved_asset_paths, vec![Some("images/logo.png")]);
+}
+
+#[test]
+fn preserves_paths_when_no_common_prefix() {
+    let bytes = build_zip(&[
+        ("README.md", "# Root readme\n"),
+        ("docs/guide.md", "# Guide\n"),
+    ]);
+
+    let index = analyze_zip(&bytes).expect("zip without common prefix should analyze");
+
+    let candidate_paths: Vec<&str> = index
+        .entry_candidates
+        .iter()
+        .map(|candidate| candidate.path.as_str())
+        .collect();
+    assert!(candidate_paths.contains(&"README.md"));
+    assert!(candidate_paths.contains(&"docs/guide.md"));
+}
+
+#[test]
+fn preserves_paths_when_multiple_top_level_directories() {
+    let bytes = build_zip(&[("dir-a/README.md", "# A\n"), ("dir-b/README.md", "# B\n")]);
+
+    let index = analyze_zip(&bytes).expect("zip with multiple top dirs should analyze");
+
+    let candidate_paths: Vec<&str> = index
+        .entry_candidates
+        .iter()
+        .map(|candidate| candidate.path.as_str())
+        .collect();
+    assert!(candidate_paths.contains(&"dir-a/README.md"));
+    assert!(candidate_paths.contains(&"dir-b/README.md"));
+}
+
+#[test]
+fn strips_common_prefix_single_nested_file() {
+    let bytes = build_zip(&[("only-dir/file.md", "# Single\n")]);
+
+    let index = analyze_zip(&bytes).expect("single nested file zip should analyze");
+
+    assert_eq!(index.selected_entry.as_deref(), Some("file.md"));
+    assert_eq!(
+        index.entry_selection_reason,
+        EntrySelectionReason::SingleMarkdownFile
+    );
+}
diff --git a/crates/marknest/src/lib.rs b/crates/marknest/src/lib.rs
index 2f0031b..c184977 100644
--- a/crates/marknest/src/lib.rs
+++ b/crates/marknest/src/lib.rs
@@ -293,6 +293,8 @@ fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
         ))
     })?;
 
+    // Collect all entries with normalized paths first to detect a common prefix
+    let mut collected_entries: Vec<(String, Vec<u8>)> = Vec::new();
     for index in 0..archive.len() {
         let mut entry = archive.by_index(index).map_err(|error| {
             AppFailure::validation(format!("Failed to read ZIP entry {index}: {error}"))
@@ -306,7 +308,21 @@ fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
         let normalized_path = normalize_relative_string(&raw_path).map_err(|_| {
             AppFailure::validation(format!("Unsafe ZIP entry path detected: {raw_path}"))
         })?;
-        let output_path = normalized_path_to_filesystem_path(temp_dir.path(), &normalized_path);
+
+        let mut contents: Vec<u8> = Vec::new();
+        entry.read_to_end(&mut contents).map_err(|error| {
+            AppFailure::validation(format!("Failed to extract ZIP entry {raw_path}: {error}"))
+        })?;
+
+        collected_entries.push((normalized_path, contents));
+    }
+
+    // Strip common prefix (e.g. GitHub archive `repo-main/` wrapper)
+    let prefix_len = detect_common_prefix_len(&collected_entries);
+
+    for (normalized_path, contents) in &collected_entries {
+        let stripped_path = &normalized_path[prefix_len..];
+        let output_path = normalized_path_to_filesystem_path(temp_dir.path(), stripped_path);
 
         if let Some(parent) = output_path.parent() {
             fs::create_dir_all(parent).map_err(|error| {
@@ -317,10 +333,6 @@ fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
             })?;
         }
 
-        let mut contents: Vec<u8> = Vec::new();
-        entry.read_to_end(&mut contents).map_err(|error| {
-            AppFailure::validation(format!("Failed to extract ZIP entry {raw_path}: {error}"))
-        })?;
         fs::write(&output_path, contents).map_err(|error| {
             AppFailure::system(format!(
                 "Failed to write the extracted ZIP entry {}: {error}",
@@ -332,6 +344,31 @@ fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
     Ok(temp_dir)
 }
 
+/// Returns the length (including trailing `/`) of the common first path segment
+/// shared by all entries, or 0 if no common prefix exists.
+fn detect_common_prefix_len(entries: &[(String, Vec<u8>)]) -> usize {
+    if entries.is_empty() {
+        return 0;
+    }
+
+    let common = match entries[0].0.split('/').next() {
+        Some(segment) => segment,
+        None => return 0,
+    };
+
+    let all_share_prefix = entries.iter().all(|(path, _)| {
+        path.starts_with(common)
+            && path.len() > common.len()
+            && path.as_bytes()[common.len()] == b'/'
+    });
+
+    if all_share_prefix {
+        common.len() + 1
+    } else {
+        0
+    }
+}
+
 fn run_single_convert(
     args: &ConvertArgs,
     analyzed_input: &AnalyzedInput,

From ccb820255fd294c935ba6a68389ed2f12d9ce1d1 Mon Sep 17 00:00:00 2001
From: Yonghye Kwon <developer.0hye@gmail.com>
Date: Mon, 9 Mar 2026 15:00:40 +0900
Subject: [PATCH 2/6] feat: add GitHub URL parser for CLI input

Parse GitHub URLs into owner, repo, ref, subpath, and blob/tree type.
Supports bare repo URLs, branch/tag refs, blob paths, and tree paths.
Handles .git suffix and http/https schemes. Rejects non-GitHub URLs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
---
 crates/marknest/src/lib.rs | 206 +++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)

diff --git a/crates/marknest/src/lib.rs b/crates/marknest/src/lib.rs
index c184977..7768a17 100644
--- a/crates/marknest/src/lib.rs
+++ b/crates/marknest/src/lib.rs
@@ -3650,6 +3650,88 @@ impl ParseFailure {
     }
 }
 
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct ParsedGitHubUrl {
+    owner: String,
+    repo: String,
+    git_ref: Option<String>,
+    subpath: Option<String>,
+    is_file_reference: bool,
+}
+
+/// Parse a GitHub URL into its components. Returns `None` for non-GitHub URLs
+/// or malformed input.
+fn parse_github_url(input: &str) -> Option<ParsedGitHubUrl> {
+    let trimmed: &str = input.trim();
+
+    // Must start with http:// or https://
+    let after_scheme: &str = trimmed
+        .strip_prefix("https://")
+        .or_else(|| trimmed.strip_prefix("http://"))?;
+
+    // Must be github.com host (with optional www.)
+    let after_host: &str = after_scheme
+        .strip_prefix("github.com/")
+        .or_else(|| after_scheme.strip_prefix("www.github.com/"))?;
+
+    // Split remaining path segments
+    let segments: Vec<&str> = after_host
+        .trim_end_matches('/')
+        .split('/')
+        .filter(|segment| !segment.is_empty())
+        .collect();
+
+    if segments.len() < 2 {
+        return None;
+    }
+
+    let owner: String = segments[0].to_string();
+    let repo: String = segments[1].trim_end_matches(".git").to_string();
+
+    if owner.is_empty() || repo.is_empty() {
+        return None;
+    }
+
+    // Bare repo URL: https://github.com/owner/repo
+    if segments.len() == 2 {
+        return Some(ParsedGitHubUrl {
+            owner,
+            repo,
+            git_ref: None,
+            subpath: None,
+            is_file_reference: false,
+        });
+    }
+
+    // Must have /tree/ or /blob/ as the third segment
+    let path_type: &str = segments[2];
+    let is_file_reference: bool = match path_type {
+        "blob" => true,
+        "tree" => false,
+        _ => return None,
+    };
+
+    // Must have a ref after /tree/ or /blob/
+    if segments.len() < 4 {
+        return None;
+    }
+
+    let git_ref: String = segments[3].to_string();
+    let subpath: Option<String> = if segments.len() > 4 {
+        Some(segments[4..].join("/"))
+    } else {
+        None
+    };
+
+    Some(ParsedGitHubUrl {
+        owner,
+        repo,
+        git_ref: Some(git_ref),
+        subpath,
+        is_file_reference,
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5038,4 +5120,128 @@ mod tests {
             None => unsafe { env::remove_var(key) },
         }
     }
+
+    // --- GitHub URL parsing tests ---
+
+    #[test]
+    fn parses_bare_github_repo_url() {
+        let result = parse_github_url("https://github.com/user/repo");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: None,
+                subpath: None,
+                is_file_reference: false,
+            })
+        );
+    }
+
+    #[test]
+    fn parses_github_tree_url_with_branch() {
+        let result = parse_github_url("https://github.com/user/repo/tree/main");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: Some("main".to_string()),
+                subpath: None,
+                is_file_reference: false,
+            })
+        );
+    }
+
+    #[test]
+    fn parses_github_blob_url_with_file_path() {
+        let result = parse_github_url("https://github.com/user/repo/blob/main/docs/guide.md");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: Some("main".to_string()),
+                subpath: Some("docs/guide.md".to_string()),
+                is_file_reference: true,
+            })
+        );
+    }
+
+    #[test]
+    fn parses_github_tree_url_with_tag_and_directory() {
+        let result = parse_github_url("https://github.com/user/repo/tree/v2.0/src");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: Some("v2.0".to_string()),
+                subpath: Some("src".to_string()),
+                is_file_reference: false,
+            })
+        );
+    }
+
+    #[test]
+    fn parses_github_url_with_dot_git_suffix() {
+        let result = parse_github_url("https://github.com/user/repo.git");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: None,
+                subpath: None,
+                is_file_reference: false,
+            })
+        );
+    }
+
+    #[test]
+    fn parses_http_github_url() {
+        let result = parse_github_url("http://github.com/user/repo");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: None,
+                subpath: None,
+                is_file_reference: false,
+            })
+        );
+    }
+
+    #[test]
+    fn rejects_non_github_url() {
+        assert_eq!(parse_github_url("https://gitlab.com/user/repo"), None);
+    }
+
+    #[test]
+    fn rejects_malformed_github_url_missing_repo() {
+        assert_eq!(parse_github_url("https://github.com/user"), None);
+    }
+
+    #[test]
+    fn rejects_non_url_input() {
+        assert_eq!(parse_github_url("README.md"), None);
+        assert_eq!(parse_github_url("./docs.zip"), None);
+        assert_eq!(parse_github_url("/some/path"), None);
+    }
+
+    #[test]
+    fn parses_github_url_with_trailing_slash() {
+        let result = parse_github_url("https://github.com/user/repo/");
+        assert_eq!(
+            result,
+            Some(ParsedGitHubUrl {
+                owner: "user".to_string(),
+                repo: "repo".to_string(),
+                git_ref: None,
+                subpath: None,
+                is_file_reference: false,
+            })
+        );
+    }
 }

From 58248be6380b27eaa266048c3a61ea717d0c58ee Mon Sep 17 00:00:00 2001
From: Yonghye Kwon <developer.0hye@gmail.com>
Date: Mon, 9 Mar 2026 15:02:52 +0900
Subject: [PATCH 3/6] feat: add GitHub archive download functions

Add resolve_github_auth_token (GITHUB_TOKEN/GH_TOKEN env vars),
resolve_github_default_branch (GitHub API), and
download_github_archive (zipball endpoint with 256 MB limit).
Reuses existing ureq HTTP patterns. Includes descriptive error
messages for 404, 403/rate-limit, timeout, and size exceeded.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
---
 crates/marknest/src/lib.rs | 184 +++++++++++++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)

diff --git a/crates/marknest/src/lib.rs b/crates/marknest/src/lib.rs
index 7768a17..5c6169a 100644
--- a/crates/marknest/src/lib.rs
+++ b/crates/marknest/src/lib.rs
@@ -35,6 +35,9 @@ const REMOTE_ASSET_TIMEOUT_SECONDS: u64 = 15;
 const REMOTE_ASSET_MAX_REDIRECTS: u32 = 5;
 const REMOTE_ASSET_MAX_BYTES: usize = 16 * 1024 * 1024;
 const REMOTE_ASSET_MAX_TOTAL_BYTES: usize = 64 * 1024 * 1024;
+const GITHUB_ARCHIVE_MAX_BYTES: usize = 256 * 1024 * 1024;
+const GITHUB_API_TIMEOUT_SECONDS: u64 = 30;
+const GITHUB_API_MAX_REDIRECTS: u32 = 5;
 
 pub fn run<I, T>(args: I) -> i32
 where
@@ -3732,6 +3735,141 @@ fn parse_github_url(input: &str) -> Option<ParsedGitHubUrl> {
     })
 }
 
+/// Resolve GitHub auth token from environment variables.
+/// Checks GITHUB_TOKEN first, then falls back to GH_TOKEN.
+fn resolve_github_auth_token() -> Option<String> {
+    env::var("GITHUB_TOKEN")
+        .ok()
+        .or_else(|| env::var("GH_TOKEN").ok())
+        .filter(|token| !token.is_empty())
+}
+
+fn build_github_api_agent() -> ureq::Agent {
+    ureq::AgentBuilder::new()
+        .timeout_connect(Duration::from_secs(GITHUB_API_TIMEOUT_SECONDS))
+        .timeout_read(Duration::from_secs(GITHUB_API_TIMEOUT_SECONDS))
+        .timeout_write(Duration::from_secs(GITHUB_API_TIMEOUT_SECONDS))
+        .redirects(GITHUB_API_MAX_REDIRECTS)
+        .build()
+}
+
+/// Query the GitHub API for the default branch of a repository.
+fn resolve_github_default_branch(
+    owner: &str,
+    repo: &str,
+    token: Option<&str>,
+) -> Result<String, AppFailure> {
+    let url: String = format!("https://api.github.com/repos/{owner}/{repo}");
+    let agent: ureq::Agent = build_github_api_agent();
+    let mut request = agent
+        .get(&url)
+        .set("Accept", "application/vnd.github+json")
+        .set("User-Agent", "marknest");
+
+    if let Some(token) = token {
+        request = request.set("Authorization", &format!("Bearer {token}"));
+    }
+
+    let response = request.call().map_err(|error| match &error {
+        ureq::Error::Status(404, _) => AppFailure::validation(
+            "GitHub repository not found or access denied. Use GITHUB_TOKEN or GH_TOKEN for private repositories.".to_string(),
+        ),
+        ureq::Error::Status(403, response) => {
+            if response
+                .header("X-RateLimit-Remaining")
+                .map(|value| value == "0")
+                .unwrap_or(false)
+            {
+                AppFailure::validation(
+                    "GitHub API rate limit exceeded. Set GITHUB_TOKEN or GH_TOKEN to increase the limit.".to_string(),
+                )
+            } else {
+                AppFailure::system(format!("Failed to query the GitHub API: {error}"))
+            }
+        }
+        _ => AppFailure::system(format!("Failed to query the GitHub API: {error}")),
+    })?;
+
+    let body: String = response.into_string().map_err(|error| {
+        AppFailure::system(format!("Failed to read the GitHub API response: {error}"))
+    })?;
+
+    let json: serde_json::Value = serde_json::from_str(&body).map_err(|error| {
+        AppFailure::system(format!("Failed to parse the GitHub API response: {error}"))
+    })?;
+
+    json["default_branch"]
+        .as_str()
+        .map(|value| value.to_string())
+        .ok_or_else(|| {
+            AppFailure::system(
+                "GitHub API response did not include a default_branch field.".to_string(),
+            )
+        })
+}
+
+/// Download a GitHub repository archive as a ZIP file.
+fn download_github_archive(
+    owner: &str,
+    repo: &str,
+    git_ref: &str,
+    token: Option<&str>,
+) -> Result<Vec<u8>, AppFailure> {
+    let url: String = format!("https://api.github.com/repos/{owner}/{repo}/zipball/{git_ref}");
+    let agent: ureq::Agent = build_github_api_agent();
+    let mut request = agent
+        .get(&url)
+        .set("Accept", "application/vnd.github+json")
+        .set("User-Agent", "marknest");
+
+    if let Some(token) = token {
+        request = request.set("Authorization", &format!("Bearer {token}"));
+    }
+
+    let response = request.call().map_err(|error| match &error {
+        ureq::Error::Status(404, _) => AppFailure::validation(
+            "GitHub repository not found or access denied. Use GITHUB_TOKEN or GH_TOKEN for private repositories.".to_string(),
+        ),
+        ureq::Error::Status(403, response) => {
+            if response
+                .header("X-RateLimit-Remaining")
+                .map(|value| value == "0")
+                .unwrap_or(false)
+            {
+                AppFailure::validation(
+                    "GitHub API rate limit exceeded. Set GITHUB_TOKEN or GH_TOKEN to increase the limit.".to_string(),
+                )
+            } else {
+                AppFailure::system(format!("Failed to download the GitHub archive: {error}"))
+            }
+        }
+        _ => AppFailure::system(format!("Failed to download the GitHub archive: {error}")),
+    })?;
+
+    let mut reader = response.into_reader();
+    let mut bytes: Vec<u8> = Vec::new();
+    let mut buffer = [0_u8; 8192];
+
+    loop {
+        let bytes_read: usize = reader.read(&mut buffer).map_err(|error| {
+            AppFailure::system(format!("Failed to download the GitHub archive: {error}"))
+        })?;
+        if bytes_read == 0 {
+            break;
+        }
+
+        if bytes.len() + bytes_read > GITHUB_ARCHIVE_MAX_BYTES {
+            return Err(AppFailure::validation(
+                "GitHub archive download exceeded the 256 MB limit.".to_string(),
+            ));
+        }
+
+        bytes.extend_from_slice(&buffer[..bytes_read]);
+    }
+
+    Ok(bytes)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5121,6 +5259,52 @@ mod tests {
         }
     }
 
+    // --- GitHub auth token resolution tests ---
+    // Combined into one test to avoid env var race conditions in parallel test execution
+
+    #[test]
+    fn resolves_github_auth_token_from_environment() {
+        let original_github = env::var_os("GITHUB_TOKEN");
+        let original_gh = env::var_os("GH_TOKEN");
+
+        // GITHUB_TOKEN takes priority
+        unsafe {
+            env::set_var("GITHUB_TOKEN", "token-from-github");
+            env::remove_var("GH_TOKEN");
+        }
+        assert_eq!(
+            resolve_github_auth_token(),
+            Some("token-from-github".to_string())
+        );
+
+        // Falls back to GH_TOKEN
+        unsafe {
+            env::remove_var("GITHUB_TOKEN");
+            env::set_var("GH_TOKEN", "token-from-gh");
+        }
+        assert_eq!(
+            resolve_github_auth_token(),
+            Some("token-from-gh".to_string())
+        );
+
+        // Returns None when neither is set
+        unsafe {
+            env::remove_var("GITHUB_TOKEN");
+            env::remove_var("GH_TOKEN");
+        }
+        assert_eq!(resolve_github_auth_token(), None);
+
+        // Ignores empty values
+        unsafe {
+            env::set_var("GITHUB_TOKEN", "");
+            env::set_var("GH_TOKEN", "");
+        }
+        assert_eq!(resolve_github_auth_token(), None);
+
+        restore_env_var("GITHUB_TOKEN", original_github);
+        restore_env_var("GH_TOKEN", original_gh);
+    }
+
     // --- GitHub URL parsing tests ---
 
     #[test]

From 1bc0e26237f22670d6d017403fd2cf297acf74ea Mon Sep 17 00:00:00 2001
From: Yonghye Kwon <developer.0hye@gmail.com>
Date: Mon, 9 Mar 2026 15:06:15 +0900
Subject: [PATCH 4/6] feat: integrate GitHub URL support into CLI pipeline

Add GitHubUrl variant to ResolvedInput. resolve_input() detects
GitHub URLs before filesystem access and returns the parsed URL.
analyze_input_path() downloads the archive, saves to a temp file,
and feeds it into the existing ZIP analysis pipeline. Blob URLs
set the implicit entry. Temp directory kept alive via _temp_dir
field on AnalyzedInput.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
---
 crates/marknest/src/lib.rs | 104 +++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/crates/marknest/src/lib.rs b/crates/marknest/src/lib.rs
index 5c6169a..a3b758b 100644
--- a/crates/marknest/src/lib.rs
+++ b/crates/marknest/src/lib.rs
@@ -811,6 +811,7 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: Some(workspace_root.clone()),
                 default_output_directory: Some(workspace_root),
                 project_index,
+                _temp_dir: None,
             })
         }
         ResolvedInput::Zip { path, display_path } => {
@@ -842,6 +843,7 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: None,
                 default_output_directory,
                 project_index,
+                _temp_dir: None,
             })
         }
         ResolvedInput::Folder {
@@ -867,6 +869,58 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: Some(canonical_root.clone()),
                 default_output_directory: Some(canonical_root),
                 project_index,
+                _temp_dir: None,
+            })
+        }
+        ResolvedInput::GitHubUrl {
+            display_path,
+            parsed,
+        } => {
+            let token: Option<String> = resolve_github_auth_token();
+
+            let git_ref: String = match &parsed.git_ref {
+                Some(r) => r.clone(),
+                None => {
+                    resolve_github_default_branch(&parsed.owner, &parsed.repo, token.as_deref())?
+                }
+            };
+
+            eprintln!(
+                "Downloading GitHub archive: {}/{} @ {} ...",
+                parsed.owner, parsed.repo, git_ref
+            );
+            let zip_bytes: Vec<u8> =
+                download_github_archive(&parsed.owner, &parsed.repo, &git_ref, token.as_deref())?;
+
+            // Save to temp file so the existing ZIP pipeline can process it
+            let temp_dir: TempDir = TempDir::new().map_err(|error| {
+                AppFailure::system(format!("Failed to create temp directory: {error}"))
+            })?;
+            let temp_zip_path: PathBuf = temp_dir.path().join("github-archive.zip");
+            fs::write(&temp_zip_path, &zip_bytes).map_err(|error| {
+                AppFailure::system(format!("Failed to write temp archive: {error}"))
+            })?;
+
+            let project_index: ProjectIndex = analyze_zip(&zip_bytes).map_err(map_analyze_error)?;
+
+            // If URL pointed to a specific file (/blob/), use it as implicit entry
+            let explicit_entry: Option<String> = if parsed.is_file_reference {
+                parsed.subpath.clone()
+            } else {
+                None
+            };
+
+            Ok(AnalyzedInput {
+                resolved_input_path: temp_zip_path,
+                input_kind: ValidationInputKind::Zip,
+                input_path: display_path,
+                is_default_input: false,
+                uses_implicit_all: false,
+                explicit_entry,
+                workspace_root: None,
+                default_output_directory: Some(env::current_dir().unwrap_or_default()),
+                project_index,
+                _temp_dir: Some(temp_dir),
             })
         }
     }
@@ -880,6 +934,17 @@ fn resolve_input(input: Option<&Path>) -> Result<ResolvedInput, AppFailure> {
             AppFailure::system(format!("Failed to read the current directory: {error}"))
         })?,
     };
+
+    // Check for GitHub URL before filesystem access
+    if let Some(path_str) = path.to_str() {
+        if let Some(parsed) = parse_github_url(path_str) {
+            return Ok(ResolvedInput::GitHubUrl {
+                display_path: path_str.to_string(),
+                parsed,
+            });
+        }
+    }
+
     let display_path = path.display().to_string();
     let metadata = fs::metadata(&path).map_err(|error| {
         AppFailure::validation(format!(
@@ -2395,6 +2460,10 @@ enum ResolvedInput {
         display_path: String,
         is_default_input: bool,
     },
+    GitHubUrl {
+        display_path: String,
+        parsed: ParsedGitHubUrl,
+    },
 }
 
 #[derive(Debug)]
@@ -2408,6 +2477,9 @@ struct AnalyzedInput {
     workspace_root: Option<PathBuf>,
     default_output_directory: Option<PathBuf>,
     project_index: ProjectIndex,
+    /// Keeps temporary directory alive for the duration of analysis/conversion.
+    /// Used by GitHub URL downloads to hold the temp archive file.
+    _temp_dir: Option<TempDir>,
 }
 
 #[derive(Debug, Clone)]
@@ -5259,6 +5331,38 @@ mod tests {
         }
     }
 
+    // --- GitHub URL resolve_input tests ---
+
+    #[test]
+    fn resolve_input_returns_github_url_variant_for_github_urls() {
+        let path = Path::new("https://github.com/user/repo");
+        let result = resolve_input(Some(path)).expect("should resolve GitHub URL");
+        match result {
+            ResolvedInput::GitHubUrl {
+                display_path,
+                parsed,
+            } => {
+                assert_eq!(display_path, "https://github.com/user/repo");
+                assert_eq!(parsed.owner, "user");
+                assert_eq!(parsed.repo, "repo");
+            }
+            _ => panic!("expected GitHubUrl variant"),
+        }
+    }
+
+    #[test]
+    fn resolve_input_returns_local_type_for_non_url_paths() {
+        let temp_dir = TempDir::new().expect("temp dir");
+        let md_path = temp_dir.path().join("test.md");
+        fs::write(&md_path, "# Test").expect("write");
+
+        let result = resolve_input(Some(&md_path)).expect("should resolve markdown file");
+        match result {
+            ResolvedInput::MarkdownFile { .. } => {}
+            _ => panic!("expected MarkdownFile variant"),
+        }
+    }
+
     // --- GitHub auth token resolution tests ---
     // Combined into one test to avoid env var race conditions in parallel test execution
 

From 86aea19a62fbbcf271e08cc00195aea2abab6a2e Mon Sep 17 00:00:00 2001
From: Yonghye Kwon <developer.0hye@gmail.com>
Date: Mon, 9 Mar 2026 15:09:53 +0900
Subject: [PATCH 5/6] docs: add GitHub URL usage to help text and README

Update convert, validate, and root help messages to document
GitHub URL input support and GITHUB_TOKEN/GH_TOKEN env vars.
Add GitHub URL examples to README development section.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
---
 README.md                  | 11 +++++++++++
 crates/marknest/src/lib.rs |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 5ad2a13..f83486a 100644
--- a/README.md
+++ b/README.md
@@ -112,6 +112,17 @@ cargo run -p marknest -- convert ./docs.zip --all --out-dir ./pdf
 cargo run -p marknest -- convert ./docs --out-dir ./pdf --render-report ./out/render-report.json
 ```
 
+Convert directly from a GitHub URL:
+
+```bash
+cargo run -p marknest -- convert https://github.com/user/repo -o output.pdf
+cargo run -p marknest -- convert https://github.com/user/repo/blob/main/docs/guide.md -o guide.pdf
+cargo run -p marknest -- convert https://github.com/user/repo/tree/v2.0 --all --out-dir ./pdf
+cargo run -p marknest -- validate https://github.com/user/repo
+```
+
+GitHub URL support downloads the repository as a ZIP archive through the GitHub API and processes it through the existing ZIP pipeline. Set `GITHUB_TOKEN` or `GH_TOKEN` for private repositories or to avoid API rate limits.
+
 `convert` requires `node`, `npm ci --prefix crates/marknest/playwright-runtime`, and a local Chrome, Edge, or Chromium installation for Playwright headless PDF generation.
 `--mermaid auto|on` and `--math auto|on` use vendored local Mermaid and MathJax runtime assets; when `--debug-html` is written with those modes enabled, a sibling `runtime-assets/` directory is emitted for offline reproduction.
 Supported defaults can come from `.marknest.toml`, `marknest.toml`, `MARKNEST_CONFIG`, `MARKNEST_THEME`, `MARKNEST_CSS`, `MARKNEST_TOC`, and `MARKNEST_SANITIZE_HTML`.
diff --git a/crates/marknest/src/lib.rs b/crates/marknest/src/lib.rs
index a3b758b..541512a 100644
--- a/crates/marknest/src/lib.rs
+++ b/crates/marknest/src/lib.rs
@@ -2311,19 +2311,19 @@ fn parse_convert_args(binary_name: &str, args: &[String]) -> Result<ParseResult,
 
 fn root_help(binary_name: &str) -> String {
     format!(
-        "Convert and validate Markdown workspaces.\n\nUsage:\n  {binary_name} convert [INPUT] [--entry <PATH> | --all] [-o <PATH> | --out-dir <PATH>] [--config <PATH>] [--render-report <PATH>] [--debug-html <PATH>] [--asset-manifest <PATH>] [--css <PATH>] [--header-template <PATH>] [--footer-template <PATH>] [--page-size <a4|letter>] [--margin <MM>] [--margin-top <MM>] [--margin-right <MM>] [--margin-bottom <MM>] [--margin-left <MM>] [--theme <default|github|docs|plain>] [--landscape] [--toc | --no-toc] [--sanitize-html | --no-sanitize-html] [--title <TEXT>] [--author <TEXT>] [--subject <TEXT>] [--mermaid <off|auto|on>] [--math <off|auto|on>] [--mermaid-timeout-ms <MS>] [--math-timeout-ms <MS>]\n  {binary_name} validate [INPUT] [--entry <PATH> | --all] [--strict] [--report <PATH>]\n  {binary_name} --help\n"
+        "Convert and validate Markdown workspaces.\n\nUsage:\n  {binary_name} convert [INPUT] [--entry <PATH> | --all] [-o <PATH> | --out-dir <PATH>] [--config <PATH>] [--render-report <PATH>] [--debug-html <PATH>] [--asset-manifest <PATH>] [--css <PATH>] [--header-template <PATH>] [--footer-template <PATH>] [--page-size <a4|letter>] [--margin <MM>] [--margin-top <MM>] [--margin-right <MM>] [--margin-bottom <MM>] [--margin-left <MM>] [--theme <default|github|docs|plain>] [--landscape] [--toc | --no-toc] [--sanitize-html | --no-sanitize-html] [--title <TEXT>] [--author <TEXT>] [--subject <TEXT>] [--mermaid <off|auto|on>] [--math <off|auto|on>] [--mermaid-timeout-ms <MS>] [--math-timeout-ms <MS>]\n  {binary_name} validate [INPUT] [--entry <PATH> | --all] [--strict] [--report <PATH>]\n  {binary_name} --help\n\nINPUT can be a Markdown file, ZIP archive, folder, or GitHub URL.\n\nGitHub URL examples:\n  {binary_name} convert https://github.com/user/repo -o output.pdf\n  {binary_name} convert https://github.com/user/repo/blob/main/guide.md -o guide.pdf\n  {binary_name} convert https://github.com/user/repo --all --out-dir ./pdf\n\nEnvironment:\n  GITHUB_TOKEN / GH_TOKEN    GitHub auth token for private repos and higher rate limits\n"
     )
 }
 
 fn validate_help(binary_name: &str) -> String {
     format!(
-        "Validate Markdown workspaces and ZIP inputs.\n\nUsage:\n  {binary_name} validate [INPUT] [OPTIONS]\n\nOptions:\n  --entry <PATH>   Validate a single Markdown entry inside a folder or ZIP input.\n  --all            Validate all Markdown entries.\n  --strict         Treat warnings as validation failures.\n  --report <PATH>  Write a JSON validation report.\n  -h, --help       Show this help message.\n"
+        "Validate Markdown workspaces and ZIP inputs.\n\nUsage:\n  {binary_name} validate [INPUT] [OPTIONS]\n\nINPUT can be a Markdown file, ZIP archive, folder, or GitHub URL.\n\nOptions:\n  --entry <PATH>   Validate a single Markdown entry inside a folder or ZIP input.\n  --all            Validate all Markdown entries.\n  --strict         Treat warnings as validation failures.\n  --report <PATH>  Write a JSON validation report.\n  -h, --help       Show this help message.\n\nEnvironment:\n  GITHUB_TOKEN / GH_TOKEN    GitHub auth token for private repos and higher rate limits\n"
     )
 }
 
 fn convert_help(binary_name: &str) -> String {
     format!(
-        "Convert Markdown entries into PDF files.\n\nUsage:\n  {binary_name} convert [INPUT] [OPTIONS]\n\nOptions:\n  --entry <PATH>               Convert one Markdown entry inside a folder or ZIP input.\n  --all                        Convert all Markdown entries.\n  -o, --output <PATH>          Write a single PDF to a specific path.\n  --out-dir <PATH>             Write batch PDF output under a directory.\n  --config <PATH>              Load conversion defaults from a TOML config file.\n  --render-report <PATH>       Write a JSON conversion report.\n  --debug-html <PATH>          Write the rendered HTML used for PDF generation.\n  --asset-manifest <PATH>      Write the selected entry asset manifest as JSON.\n  --css <PATH>                 Append a custom CSS file after the theme stylesheet.\n  --header-template <PATH>     Load an HTML header template for Chromium print output.\n  --footer-template <PATH>     Load an HTML footer template for Chromium print output.\n  --page-size <a4|letter>      Set the output page size.\n  --margin <MM>                Set the same margin on all sides in millimeters.\n  --margin-top <MM>            Override the top page margin in millimeters.\n  --margin-right <MM>          Override the right page margin in millimeters.\n  --margin-bottom <MM>         Override the bottom page margin in millimeters.\n  --margin-left <MM>           Override the left page margin in millimeters.\n  --theme <default|github|docs|plain>\n                               Apply a built-in document theme.\n  --landscape                  Render the PDF in landscape orientation.\n  --toc                        Insert a generated table of contents near the top of the document.\n  --no-toc                     Skip the generated table of contents.\n  --sanitize-html              Sanitize rendered document HTML before PDF generation.\n  --no-sanitize-html           Trust document HTML and skip sanitization.\n  --title <TEXT>               Override the document title.\n  --author <TEXT>              Set the PDF author metadata.\n  --subject <TEXT>             Set the PDF subject metadata.\n  --mermaid <off|auto|on>      Control Mermaid rendering.\n  --math <off|auto|on>         Control Math rendering.\n  --mermaid-timeout-ms <MS>    Set the per-diagram Mermaid render timeout.\n  --math-timeout-ms <MS>       Set the per-expression Math render timeout.\n  -h, --help                   Show this help message.\n"
+        "Convert Markdown entries into PDF files.\n\nUsage:\n  {binary_name} convert [INPUT] [OPTIONS]\n\nINPUT can be a Markdown file, ZIP archive, folder, or GitHub URL.\n\nGitHub URL examples:\n  {binary_name} convert https://github.com/user/repo -o output.pdf\n  {binary_name} convert https://github.com/user/repo/blob/main/guide.md -o guide.pdf\n  {binary_name} convert https://github.com/user/repo --all --out-dir ./pdf\n\nOptions:\n  --entry <PATH>               Convert one Markdown entry inside a folder or ZIP input.\n  --all                        Convert all Markdown entries.\n  -o, --output <PATH>          Write a single PDF to a specific path.\n  --out-dir <PATH>             Write batch PDF output under a directory.\n  --config <PATH>              Load conversion defaults from a TOML config file.\n  --render-report <PATH>       Write a JSON conversion report.\n  --debug-html <PATH>          Write the rendered HTML used for PDF generation.\n  --asset-manifest <PATH>      Write the selected entry asset manifest as JSON.\n  --css <PATH>                 Append a custom CSS file after the theme stylesheet.\n  --header-template <PATH>     Load an HTML header template for Chromium print output.\n  --footer-template <PATH>     Load an HTML footer template for Chromium print output.\n  --page-size <a4|letter>      Set the output page size.\n  --margin <MM>                Set the same margin on all sides in millimeters.\n  --margin-top <MM>            Override the top page margin in millimeters.\n  --margin-right <MM>          Override the right page margin in millimeters.\n  --margin-bottom <MM>         Override the bottom page margin in millimeters.\n  --margin-left <MM>           Override the left page margin in millimeters.\n  --theme <default|github|docs|plain>\n                               Apply a built-in document theme.\n  --landscape                  Render the PDF in landscape orientation.\n  --toc                        Insert a generated table of contents near the top of the document.\n  --no-toc                     Skip the generated table of contents.\n  --sanitize-html              Sanitize rendered document HTML before PDF generation.\n  --no-sanitize-html           Trust document HTML and skip sanitization.\n  --title <TEXT>               Override the document title.\n  --author <TEXT>              Set the PDF author metadata.\n  --subject <TEXT>             Set the PDF subject metadata.\n  --mermaid <off|auto|on>      Control Mermaid rendering.\n  --math <off|auto|on>         Control Math rendering.\n  --mermaid-timeout-ms <MS>    Set the per-diagram Mermaid render timeout.\n  --math-timeout-ms <MS>       Set the per-expression Math render timeout.\n  -h, --help                   Show this help message.\n\nEnvironment:\n  GITHUB_TOKEN / GH_TOKEN    GitHub auth token for private repos and higher rate limits\n"
     )
 }
 

From 60f60e86e0b50b034b62133680c3ddbb407173ea Mon Sep 17 00:00:00 2001
From: Yonghye Kwon <developer.0hye@gmail.com>
Date: Mon, 9 Mar 2026 15:20:52 +0900
Subject: [PATCH 6/6] fix: make ZIP prefix stripping opt-in for GitHub archives
 only

Regular analyze_zip() no longer strips common prefixes, preserving
existing behavior for user-created ZIPs. New analyze_zip_strip_prefix()
applies stripping only when explicitly requested. The GitHub URL flow
uses the strip variant; regular ZIP inputs are unchanged. This fixes
WASM test failures where intentional subdirectory structure was being
incorrectly stripped.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
---
 crates/marknest-core/src/lib.rs           | 59 +++++++++++++----------
 crates/marknest-core/tests/analyze_zip.rs | 31 +++++++++---
 crates/marknest/src/lib.rs                | 30 +++++++++---
 3 files changed, 80 insertions(+), 40 deletions(-)

diff --git a/crates/marknest-core/src/lib.rs b/crates/marknest-core/src/lib.rs
index a1c03fb..daf5b16 100644
--- a/crates/marknest-core/src/lib.rs
+++ b/crates/marknest-core/src/lib.rs
@@ -269,6 +269,15 @@ pub fn analyze_zip(bytes: &[u8]) -> Result<ProjectIndex, AnalyzeError> {
     analyze_project(&ZipMemoryFileSystem::new(bytes)?)
 }
 
+/// Analyze a ZIP archive, stripping the common top-level directory prefix
+/// from all paths before analysis. Use this for GitHub-style archives where
+/// files are nested under a single `{repo}-{ref}/` directory.
+pub fn analyze_zip_strip_prefix(bytes: &[u8]) -> Result<ProjectIndex, AnalyzeError> {
+    let mut fs = ZipMemoryFileSystem::new(bytes)?;
+    fs.strip_common_prefix();
+    analyze_project(&fs)
+}
+
 fn remote_fetch_url(reference: &str) -> Option<String> {
     if !is_http_reference(reference) {
         return None;
@@ -431,39 +440,39 @@ impl ZipMemoryFileSystem {
             });
         }
 
-        strip_common_prefix(&mut files);
         files.sort_by(|left, right| left.normalized_path.cmp(&right.normalized_path));
         Ok(Self { files })
     }
-}
 
-/// If every file shares the same first path segment (e.g. `repo-main/`),
-/// strip that segment from all paths. This handles GitHub-style archives
-/// that nest everything under `{repo}-{ref}/`.
-fn strip_common_prefix(files: &mut [IndexedFile]) {
-    if files.is_empty() {
-        return;
-    }
+    /// Strip the common first path segment from all files if every file shares
+    /// the same top-level directory. Used for GitHub-style archives that nest
+    /// everything under `{repo}-{ref}/`.
+    fn strip_common_prefix(&mut self) {
+        if self.files.is_empty() {
+            return;
+        }
 
-    let common: String = match files[0].normalized_path.split('/').next() {
-        Some(segment) => segment.to_string(),
-        None => return,
-    };
+        let common: String = match self.files[0].normalized_path.split('/').next() {
+            Some(segment) => segment.to_string(),
+            None => return,
+        };
 
-    // All files must share the same first segment AND have content after it
-    let all_share_prefix = files.iter().all(|file| {
-        file.normalized_path.starts_with(&common)
-            && file.normalized_path.len() > common.len()
-            && file.normalized_path.as_bytes()[common.len()] == b'/'
-    });
+        let all_share_prefix = self.files.iter().all(|file| {
+            file.normalized_path.starts_with(&common)
+                && file.normalized_path.len() > common.len()
+                && file.normalized_path.as_bytes()[common.len()] == b'/'
+        });
 
-    if !all_share_prefix {
-        return;
-    }
+        if !all_share_prefix {
+            return;
+        }
 
-    let strip_len: usize = common.len() + 1; // include the '/'
-    for file in files.iter_mut() {
-        file.normalized_path = file.normalized_path[strip_len..].to_string();
+        let strip_len: usize = common.len() + 1;
+        for file in self.files.iter_mut() {
+            file.normalized_path = file.normalized_path[strip_len..].to_string();
+        }
+        self.files
+            .sort_by(|left, right| left.normalized_path.cmp(&right.normalized_path));
     }
 }
 
diff --git a/crates/marknest-core/tests/analyze_zip.rs b/crates/marknest-core/tests/analyze_zip.rs
index 943ab08..574b0a5 100644
--- a/crates/marknest-core/tests/analyze_zip.rs
+++ b/crates/marknest-core/tests/analyze_zip.rs
@@ -1,6 +1,8 @@
 use std::io::{Cursor, Write};
 
-use marknest_core::{AnalyzeError, EntrySelectionReason, ProjectSourceKind, analyze_zip};
+use marknest_core::{
+    AnalyzeError, EntrySelectionReason, ProjectSourceKind, analyze_zip, analyze_zip_strip_prefix,
+};
 use zip::write::SimpleFileOptions;
 
 fn build_zip(entries: &[(&str, &str)]) -> Vec<u8> {
@@ -80,7 +82,7 @@ fn strips_common_prefix_from_github_style_zip() {
         ("repo-main/images/logo.png", "fake-png-bytes"),
     ]);
 
-    let index = analyze_zip(&bytes).expect("github-style zip should analyze");
+    let index = analyze_zip_strip_prefix(&bytes).expect("github-style zip should analyze");
 
     assert_eq!(index.selected_entry.as_deref(), Some("README.md"));
     assert_eq!(index.entry_selection_reason, EntrySelectionReason::Readme);
@@ -101,13 +103,13 @@ fn strips_common_prefix_from_github_style_zip() {
 }
 
 #[test]
-fn preserves_paths_when_no_common_prefix() {
+fn strip_prefix_preserves_paths_when_no_common_prefix() {
     let bytes = build_zip(&[
         ("README.md", "# Root readme\n"),
         ("docs/guide.md", "# Guide\n"),
     ]);
 
-    let index = analyze_zip(&bytes).expect("zip without common prefix should analyze");
+    let index = analyze_zip_strip_prefix(&bytes).expect("zip without common prefix should analyze");
 
     let candidate_paths: Vec<&str> = index
         .entry_candidates
@@ -119,10 +121,11 @@ fn preserves_paths_when_no_common_prefix() {
 }
 
 #[test]
-fn preserves_paths_when_multiple_top_level_directories() {
+fn strip_prefix_preserves_paths_when_multiple_top_level_directories() {
     let bytes = build_zip(&[("dir-a/README.md", "# A\n"), ("dir-b/README.md", "# B\n")]);
 
-    let index = analyze_zip(&bytes).expect("zip with multiple top dirs should analyze");
+    let index =
+        analyze_zip_strip_prefix(&bytes).expect("zip with multiple top dirs should analyze");
 
     let candidate_paths: Vec<&str> = index
         .entry_candidates
@@ -134,10 +137,10 @@ fn preserves_paths_when_multiple_top_level_directories() {
 }
 
 #[test]
-fn strips_common_prefix_single_nested_file() {
+fn strip_prefix_strips_single_nested_file() {
     let bytes = build_zip(&[("only-dir/file.md", "# Single\n")]);
 
-    let index = analyze_zip(&bytes).expect("single nested file zip should analyze");
+    let index = analyze_zip_strip_prefix(&bytes).expect("single nested file zip should analyze");
 
     assert_eq!(index.selected_entry.as_deref(), Some("file.md"));
     assert_eq!(
@@ -145,3 +148,15 @@ fn strips_common_prefix_single_nested_file() {
         EntrySelectionReason::SingleMarkdownFile
     );
 }
+
+#[test]
+fn regular_analyze_zip_does_not_strip_common_prefix() {
+    let bytes = build_zip(&[
+        ("repo-main/README.md", "# Hello\n"),
+        ("repo-main/images/logo.png", "fake-png-bytes"),
+    ]);
+
+    let index = analyze_zip(&bytes).expect("should analyze without stripping");
+
+    assert_eq!(index.selected_entry.as_deref(), Some("repo-main/README.md"));
+}
diff --git a/crates/marknest/src/lib.rs b/crates/marknest/src/lib.rs
index 541512a..3df4376 100644
--- a/crates/marknest/src/lib.rs
+++ b/crates/marknest/src/lib.rs
@@ -12,7 +12,7 @@ use marknest_core::{
     EntrySelectionReason, MATHJAX_SCRIPT_URL, MATHJAX_VERSION, MERMAID_SCRIPT_URL, MERMAID_VERSION,
     MathMode, MermaidMode, PdfMetadata, ProjectIndex, ProjectSourceKind, RUNTIME_ASSET_MODE,
     RenderHtmlError, RenderOptions, ThemePreset, analyze_workspace, analyze_zip,
-    render_workspace_entry_with_options, rewrite_html_img_sources,
+    analyze_zip_strip_prefix, render_workspace_entry_with_options, rewrite_html_img_sources,
 };
 use serde::{Deserialize, Serialize};
 use tempfile::TempDir;
@@ -268,7 +268,10 @@ fn prepare_render_workspace(
     }
 
     if matches!(analyzed_input.input_kind, ValidationInputKind::Zip) {
-        let temp_dir = materialize_zip_workspace(&analyzed_input.resolved_input_path)?;
+        let temp_dir = materialize_zip_workspace(
+            &analyzed_input.resolved_input_path,
+            analyzed_input.strip_zip_prefix,
+        )?;
         let root = temp_dir.path().to_path_buf();
         return Ok(PreparedWorkspace {
             root,
@@ -281,7 +284,7 @@ fn prepare_render_workspace(
     ))
 }
 
-fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
+fn materialize_zip_workspace(zip_path: &Path, strip_prefix: bool) -> Result<TempDir, AppFailure> {
     let file = fs::File::open(zip_path).map_err(|error| {
         AppFailure::system(format!(
             "Failed to open ZIP input {}: {error}",
@@ -296,7 +299,7 @@ fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
         ))
     })?;
 
-    // Collect all entries with normalized paths first to detect a common prefix
+    // Collect all entries with normalized paths first (needed for prefix detection)
     let mut collected_entries: Vec<(String, Vec<u8>)> = Vec::new();
     for index in 0..archive.len() {
         let mut entry = archive.by_index(index).map_err(|error| {
@@ -320,8 +323,12 @@ fn materialize_zip_workspace(zip_path: &Path) -> Result<TempDir, AppFailure> {
         collected_entries.push((normalized_path, contents));
     }
 
-    // Strip common prefix (e.g. GitHub archive `repo-main/` wrapper)
-    let prefix_len = detect_common_prefix_len(&collected_entries);
+    // Only strip the common prefix for GitHub-style archives
+    let prefix_len: usize = if strip_prefix {
+        detect_common_prefix_len(&collected_entries)
+    } else {
+        0
+    };
 
     for (normalized_path, contents) in &collected_entries {
         let stripped_path = &normalized_path[prefix_len..];
@@ -811,6 +818,7 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: Some(workspace_root.clone()),
                 default_output_directory: Some(workspace_root),
                 project_index,
+                strip_zip_prefix: false,
                 _temp_dir: None,
             })
         }
@@ -843,6 +851,7 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: None,
                 default_output_directory,
                 project_index,
+                strip_zip_prefix: false,
                 _temp_dir: None,
             })
         }
@@ -869,6 +878,7 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: Some(canonical_root.clone()),
                 default_output_directory: Some(canonical_root),
                 project_index,
+                strip_zip_prefix: false,
                 _temp_dir: None,
             })
         }
@@ -901,7 +911,9 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 AppFailure::system(format!("Failed to write temp archive: {error}"))
             })?;
 
-            let project_index: ProjectIndex = analyze_zip(&zip_bytes).map_err(map_analyze_error)?;
+            // GitHub archives nest files under {repo}-{ref}/, strip that prefix
+            let project_index: ProjectIndex =
+                analyze_zip_strip_prefix(&zip_bytes).map_err(map_analyze_error)?;
 
             // If URL pointed to a specific file (/blob/), use it as implicit entry
             let explicit_entry: Option<String> = if parsed.is_file_reference {
@@ -920,6 +932,7 @@ fn analyze_input_path(input: Option<&Path>) -> Result<AnalyzedInput, AppFailure>
                 workspace_root: None,
                 default_output_directory: Some(env::current_dir().unwrap_or_default()),
                 project_index,
+                strip_zip_prefix: true,
                 _temp_dir: Some(temp_dir),
             })
         }
@@ -2477,6 +2490,9 @@ struct AnalyzedInput {
     workspace_root: Option<PathBuf>,
     default_output_directory: Option<PathBuf>,
     project_index: ProjectIndex,
+    /// Strip common prefix from ZIP paths during materialization.
+    /// Enabled for GitHub archive downloads where files are nested under `{repo}-{ref}/`.
+    strip_zip_prefix: bool,
     /// Keeps temporary directory alive for the duration of analysis/conversion.
     /// Used by GitHub URL downloads to hold the temp archive file.
     _temp_dir: Option<TempDir>,