developer0hye · developer0hye · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/README.md b/README.md
@@ -112,6 +112,17 @@ cargo run -p marknest -- convert ./docs.zip --all --out-dir ./pdf
 cargo run -p marknest -- convert ./docs --out-dir ./pdf --render-report ./out/render-report.json
 ```
 
+Convert directly from a GitHub URL:
+
+```bash
+cargo run -p marknest -- convert https://github.com/user/repo -o output.pdf
+cargo run -p marknest -- convert https://github.com/user/repo/blob/main/docs/guide.md -o guide.pdf
+cargo run -p marknest -- convert https://github.com/user/repo/tree/v2.0 --all --out-dir ./pdf
+cargo run -p marknest -- validate https://github.com/user/repo
+```
+
+GitHub URL support downloads the repository as a ZIP archive through the GitHub API and processes it through the existing ZIP pipeline. Set `GITHUB_TOKEN` or `GH_TOKEN` for private repositories or to avoid API rate limits.
+
 `convert` requires `node`, `npm ci --prefix crates/marknest/playwright-runtime`, and a local Chrome, Edge, or Chromium installation for Playwright headless PDF generation.
 `--mermaid auto|on` and `--math auto|on` use vendored local Mermaid and MathJax runtime assets; when `--debug-html` is written with those modes enabled, a sibling `runtime-assets/` directory is emitted for offline reproduction.
 Supported defaults can come from `.marknest.toml`, `marknest.toml`, `MARKNEST_CONFIG`, `MARKNEST_THEME`, `MARKNEST_CSS`, `MARKNEST_TOC`, and `MARKNEST_SANITIZE_HTML`.

diff --git a/crates/marknest-core/src/lib.rs b/crates/marknest-core/src/lib.rs
@@ -269,6 +269,15 @@ pub fn analyze_zip(bytes: &[u8]) -> Result<ProjectIndex, AnalyzeError> {
     analyze_project(&ZipMemoryFileSystem::new(bytes)?)
 }
 
+/// Analyze a ZIP archive, stripping the common top-level directory prefix
+/// from all paths before analysis. Use this for GitHub-style archives where
+/// files are nested under a single `{repo}-{ref}/` directory.
+pub fn analyze_zip_strip_prefix(bytes: &[u8]) -> Result<ProjectIndex, AnalyzeError> {
+    let mut fs = ZipMemoryFileSystem::new(bytes)?;
+    fs.strip_common_prefix();
+    analyze_project(&fs)
+}
+
 fn remote_fetch_url(reference: &str) -> Option<String> {
     if !is_http_reference(reference) {
         return None;
@@ -434,6 +443,37 @@ impl ZipMemoryFileSystem {
         files.sort_by(|left, right| left.normalized_path.cmp(&right.normalized_path));
         Ok(Self { files })
     }
+
+    /// Strip the common first path segment from all files if every file shares
+    /// the same top-level directory. Used for GitHub-style archives that nest
+    /// everything under `{repo}-{ref}/`.
+    fn strip_common_prefix(&mut self) {
+        if self.files.is_empty() {
+            return;
+        }
+
+        let common: String = match self.files[0].normalized_path.split('/').next() {
+            Some(segment) => segment.to_string(),
+            None => return,
+        };
+
+        let all_share_prefix = self.files.iter().all(|file| {
+            file.normalized_path.starts_with(&common)
+                && file.normalized_path.len() > common.len()
+                && file.normalized_path.as_bytes()[common.len()] == b'/'
+        });
+
+        if !all_share_prefix {
+            return;
+        }
+
+        let strip_len: usize = common.len() + 1;
+        for file in self.files.iter_mut() {
+            file.normalized_path = file.normalized_path[strip_len..].to_string();
+        }
+        self.files
+            .sort_by(|left, right| left.normalized_path.cmp(&right.normalized_path));
+    }
 }
 
 impl IndexedFileSystem for ZipMemoryFileSystem {

diff --git a/crates/marknest-core/tests/analyze_zip.rs b/crates/marknest-core/tests/analyze_zip.rs
@@ -1,6 +1,8 @@
 use std::io::{Cursor, Write};
 
-use marknest_core::{AnalyzeError, EntrySelectionReason, ProjectSourceKind, analyze_zip};
+use marknest_core::{
+    AnalyzeError, EntrySelectionReason, ProjectSourceKind, analyze_zip, analyze_zip_strip_prefix,
+};
 use zip::write::SimpleFileOptions;
 
 fn build_zip(entries: &[(&str, &str)]) -> Vec<u8> {
@@ -69,3 +71,92 @@ fn rejects_windows_drive_paths_inside_zip() {
         }
     );
 }
+
+#[test]
+fn strips_common_prefix_from_github_style_zip() {
+    let bytes = build_zip(&[
+        (
+            "repo-main/README.md",
+            "# Hello\n\n![Logo](./images/logo.png)\n",
+        ),
+        ("repo-main/images/logo.png", "fake-png-bytes"),
+    ]);
+
+    let index = analyze_zip_strip_prefix(&bytes).expect("github-style zip should analyze");
+
+    assert_eq!(index.selected_entry.as_deref(), Some("README.md"));
+    assert_eq!(index.entry_selection_reason, EntrySelectionReason::Readme);
+
+    let candidate_paths: Vec<&str> = index
+        .entry_candidates
+        .iter()
+        .map(|candidate| candidate.path.as_str())
+        .collect();
+    assert_eq!(candidate_paths, vec!["README.md"]);
+
+    let resolved_asset_paths: Vec<Option<&str>> = index
+        .assets
+        .iter()
+        .map(|asset| asset.resolved_path.as_deref())
+        .collect();
+    assert_eq!(resolved_asset_paths, vec![Some("images/logo.png")]);
+}
+
+#[test]
+fn strip_prefix_preserves_paths_when_no_common_prefix() {
+    let bytes = build_zip(&[
+        ("README.md", "# Root readme\n"),
+        ("docs/guide.md", "# Guide\n"),
+    ]);
+
+    let index = analyze_zip_strip_prefix(&bytes).expect("zip without common prefix should analyze");
+
+    let candidate_paths: Vec<&str> = index
+        .entry_candidates
+        .iter()
+        .map(|candidate| candidate.path.as_str())
+        .collect();
+    assert!(candidate_paths.contains(&"README.md"));
+    assert!(candidate_paths.contains(&"docs/guide.md"));
+}
+
+#[test]
+fn strip_prefix_preserves_paths_when_multiple_top_level_directories() {
+    let bytes = build_zip(&[("dir-a/README.md", "# A\n"), ("dir-b/README.md", "# B\n")]);
+
+    let index =
+        analyze_zip_strip_prefix(&bytes).expect("zip with multiple top dirs should analyze");
+
+    let candidate_paths: Vec<&str> = index
+        .entry_candidates
+        .iter()
+        .map(|candidate| candidate.path.as_str())
+        .collect();
+    assert!(candidate_paths.contains(&"dir-a/README.md"));
+    assert!(candidate_paths.contains(&"dir-b/README.md"));
+}
+
+#[test]
+fn strip_prefix_strips_single_nested_file() {
+    let bytes = build_zip(&[("only-dir/file.md", "# Single\n")]);
+
+    let index = analyze_zip_strip_prefix(&bytes).expect("single nested file zip should analyze");
+
+    assert_eq!(index.selected_entry.as_deref(), Some("file.md"));
+    assert_eq!(
+        index.entry_selection_reason,
+        EntrySelectionReason::SingleMarkdownFile
+    );
+}
+
+#[test]
+fn regular_analyze_zip_does_not_strip_common_prefix() {
+    let bytes = build_zip(&[
+        ("repo-main/README.md", "# Hello\n"),
+        ("repo-main/images/logo.png", "fake-png-bytes"),
+    ]);
+
+    let index = analyze_zip(&bytes).expect("should analyze without stripping");
+
+    assert_eq!(index.selected_entry.as_deref(), Some("repo-main/README.md"));
+}