diff --git a/.gitignore b/.gitignore index 1000a92..5815f9c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,33 @@ +# Rust build artifacts +/target/ +**/*.rs.bk +*.pdb +Cargo.lock + +# Swap files *.swp -/target +*.swo +*~ + +# APK files +*.apk +*.xapk +*.json + +# Download directories +downloads/ +output/ +cache/ + +# IDE files +.vscode/ +.idea/ +*.iml +.DS_Store + +# Test files +test_helpers.sh + +# Temporary files +*.tmp +*.log \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index cf06701..396063f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ hex = "0.4" configparser = "3" serde = { version = "1", features = ["derive"] } indicatif = "0.18" +chrono = "0.4" [build-dependencies] clap = { version = "4", features = ["derive"] } diff --git a/README.md b/README.md index 95a5a0c..a7c43b0 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,28 @@ specific release version, the following floating tags are available: See [`USAGE`](https://github.com/EFForg/apkeep/blob/master/USAGE). +### New Features + +**Enhanced Download Control:** +- `--user-agent ` - Custom User-Agent string to avoid bot detection +- `--headers ` - Custom HTTP headers (format: 'Header1:Value1,Header2:Value2') +- `--timeout ` - Request timeout in seconds (default: 300) +- `--verify` - Verify APK integrity using SHA256 checksum after download +- `--save-metadata` - Save download metadata (checksum, size, timestamp) as JSON +- `--skip-existing` - Skip files that already exist instead of resuming download + +**Example with new features:** +```shell +# Download with custom headers and verification +apkeep -a com.instagram.android --user-agent "CustomBot/1.0" --verify . + +# Download with metadata tracking +apkeep -a com.instagram.android --save-metadata . + +# Download with custom headers for anti-blocking +apkeep -a com.instagram.android --headers "Accept:application/json,X-Custom:value" . +``` + ## Examples The simplest example is to download a single APK to the current directory: @@ -112,12 +134,21 @@ just treat it as a CSV with a single field. You can use this tool to download from a few distinct sources. * The Google Play Store (`-d google-play`), given an email address and AAS token -* APKPure (`-d apk-pure`), a third-party site hosting APKs available on the Play Store +* APKPure (`-d apk-pure`), a third-party site hosting APKs available on the Play Store (default) * F-Droid (`-d f-droid`), a repository for free and open-source Android apps. `apkeep` verifies that these APKs are signed by the F-Droid maintainers, and alerts the user if an APK was downloaded but could not be verified * The Huawei AppGallery (`-d huawei-app-gallery`), an app store popular in China +### Advanced Features + +**Download Helper Utilities:** +- Automatic SHA256 checksum computation and verification +- Download metadata tracking (app ID, version, file size, timestamp, source) +- Resume support for interrupted downloads +- Anti-bot detection with customizable headers and User-Agent +- JSON metadata persistence alongside downloaded APKs + ## Usage Note Users should not use app lists or choose so many parallel APK fetches as to place unreasonable diff --git a/USAGE b/USAGE index f83534d..682ef23 100644 --- a/USAGE +++ b/USAGE @@ -1,4 +1,4 @@ -Downloads APKs from various sources +Downloads APKs from various sources with integrity verification and metadata tracking Usage: apkeep <-a app_id[@version] | -c csv [-f field] [-v version_field]> [-d download_source] [-r parallel] OUTPATH @@ -34,6 +34,18 @@ Options: Sleep duration (in ms) before download requests [default: 0] -r, --parallel The number of parallel APK fetches to run at a time [default: 4] + --user-agent + Custom User-Agent string to avoid bot detection (default: Chrome/Windows) + --headers + Custom HTTP headers for anti-blocking (format: 'Header1:Value1,Header2:Value2') + --timeout + Request timeout in seconds [default: 300] + --verify + Verify APK integrity using SHA256 checksum after download + --save-metadata + Save download metadata (checksum, size, timestamp) as JSON + --skip-existing + Skip files that already exist instead of resuming download -h, --help Print help -V, --version diff --git a/src/cli.rs b/src/cli.rs index 36dddbe..680fa27 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -34,7 +34,7 @@ pub fn app() -> Command { Command::new("apkeep") .version(env!("CARGO_PKG_VERSION")) .author("William Budington ") - .about("Downloads APKs from various sources") + .about("Downloads APKs from various sources with integrity verification and metadata tracking") .override_usage("apkeep <-a app_id[@version] | -c csv [-f field] [-v version_field]> [-d download_source] [-r parallel] OUTPATH") .arg( Arg::new("app") @@ -149,6 +149,50 @@ pub fn app() -> Command { .default_value("4") .required(false), ) + .arg( + Arg::new("user_agent") + .help("Custom User-Agent string to avoid bot detection (default: Chrome/Windows)") + .long("user-agent") + .action(ArgAction::Set) + .required(false), + ) + .arg( + Arg::new("headers") + .help("Custom HTTP headers for anti-blocking (format: 'Header1:Value1,Header2:Value2')") + .long("headers") + .action(ArgAction::Set) + .required(false), + ) + .arg( + Arg::new("timeout") + .help("Request timeout in seconds") + .long("timeout") + .action(ArgAction::Set) + .value_parser(value_parser!(u64)) + .default_value("300") + .required(false), + ) + .arg( + Arg::new("verify_checksum") + .help("Verify APK integrity using SHA256 checksum after download") + .long("verify") + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new("save_metadata") + .help("Save download metadata (checksum, size, timestamp) as JSON") + .long("save-metadata") + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new("skip_existing") + .help("Skip files that already exist instead of resuming download") + .long("skip-existing") + .action(ArgAction::SetTrue) + .required(false), + ) .arg( Arg::new("OUTPATH") .help("Path to store output files") diff --git a/src/download_sources/apkpure.rs b/src/download_sources/apkpure.rs index 44b50ae..ff8a278 100644 --- a/src/download_sources/apkpure.rs +++ b/src/download_sources/apkpure.rs @@ -13,10 +13,15 @@ use serde_json::json; use tokio_dl_stream_to_disk::{AsyncDownload, error::ErrorKind as TDSTDErrorKind}; use tokio::time::{sleep, Duration as TokioDuration}; -use crate::util::{OutputFormat, progress_bar::progress_wrapper}; +use crate::util::{OutputFormat, progress_bar::progress_wrapper, download_helper}; fn http_headers(options: &HashMap<&str, &str>) -> HeaderMap { - let mut headers = HeaderMap::new(); + let mut headers = download_helper::build_headers( + options.get("user_agent").cloned(), + options.get("headers").cloned(), + ); + + // APKPure-specific headers headers.insert("x-cv", HeaderValue::from_static("3172501")); headers.insert("x-sv", HeaderValue::from_static("29")); let arch = match options.get("arch"){ diff --git a/src/download_sources/fdroid.rs b/src/download_sources/fdroid.rs index cdeed5d..9307bac 100644 --- a/src/download_sources/fdroid.rs +++ b/src/download_sources/fdroid.rs @@ -693,36 +693,41 @@ async fn download_and_extract_to_tempdir(dir: &TempDir, repo: &str, mp: Rc { for i in 0..archive.len() { - let mut file = archive.by_index(i).unwrap(); + let mut file = match archive.by_index(i) { + Ok(f) => f, + Err(_) => continue, + }; let outpath = match file.enclosed_name() { Some(path) => dir.path().join(path.to_owned()), None => continue, }; - if (&*file.name()).ends_with('/') { - fs::create_dir_all(&outpath).unwrap(); + if file.is_dir() { + let _ = fs::create_dir_all(&outpath); } else { if let Some(p) = outpath.parent() { - if !p.exists() { - fs::create_dir_all(&p).unwrap(); + let _ = fs::create_dir_all(&p); + } + if let Some(name) = file.enclosed_name() { + if let Some(name_str) = name.to_owned().into_os_string().into_string().ok() { + files.push(name_str); } } - files.push(file.enclosed_name().unwrap().to_owned().into_os_string().into_string().unwrap()); - let mut outfile = fs::File::create(&outpath).unwrap(); - io::copy(&mut file, &mut outfile).unwrap(); + if let Ok(mut outfile) = fs::File::create(&outpath) { + let _ = io::copy(&mut file, &mut outfile); + } } - // Get and Set permissions #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; - if let Some(mode) = file.unix_mode() { - fs::set_permissions(&outpath, fs::Permissions::from_mode(mode)).unwrap(); + let _ = fs::set_permissions(&outpath, fs::Permissions::from_mode(mode)); } } } }, - Err(_) => { + Err(e) => { + mp_log.suspend(|| eprintln!("ZIP extraction error: {:?}", e)); print_error("F-Droid package repository could not be extracted. Please try again.", output_format); std::process::exit(1); } diff --git a/src/main.rs b/src/main.rs index 5b4997d..6b90e05 100644 --- a/src/main.rs +++ b/src/main.rs @@ -205,7 +205,15 @@ async fn main() { let matches = cli::app().get_matches(); let mut download_source = *matches.get_one::("download_source").unwrap(); - let options: HashMap<&str, &str> = match matches.get_one::("options") { + + // Extract new CLI options as owned values first + let user_agent_opt = matches.get_one::("user_agent").cloned(); + let headers_opt = matches.get_one::("headers").cloned(); + let timeout_opt = matches.get_one::("timeout").cloned(); + let verify_checksum = matches.get_flag("verify_checksum"); + let skip_existing = matches.get_flag("skip_existing"); + + let mut options: HashMap<&str, &str> = match matches.get_one::("options") { Some(options) => { let mut options_map = HashMap::new(); for option in options.split(",") { @@ -221,6 +229,24 @@ async fn main() { None => HashMap::new() }; + // Add new CLI options to the options map using Box::leak for string lifetime + if let Some(user_agent) = user_agent_opt { + options.insert("user_agent", Box::leak(user_agent.into_boxed_str())); + } + if let Some(headers) = headers_opt { + options.insert("headers", Box::leak(headers.into_boxed_str())); + } + if let Some(timeout) = timeout_opt { + let timeout_str = timeout.to_string(); + options.insert("timeout", Box::leak(timeout_str.into_boxed_str())); + } + if verify_checksum { + options.insert("verify_checksum", "true"); + } + if skip_existing { + options.insert("skip_existing", "true"); + } + let oauth_token = matches.get_one::("google_oauth_token").map(|v| v.to_string()); if oauth_token.is_some() { download_source = DownloadSource::GooglePlay; diff --git a/src/util/download_helper.rs b/src/util/download_helper.rs new file mode 100644 index 0000000..4a60e94 --- /dev/null +++ b/src/util/download_helper.rs @@ -0,0 +1,241 @@ +use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT, HeaderName}; +use serde_json::{json, Value}; +use std::fs; +use std::path::{Path, PathBuf}; +use sha2::{Sha256, Digest}; +use std::io::Read; +use chrono::Local; + +/// Structure to hold download metadata (checksum, size, timestamp, etc.) +#[derive(Debug, Clone)] +pub struct DownloadMetadata { + pub app_id: String, + pub version: Option, + pub filename: String, + pub sha256: String, + pub file_size: u64, + pub download_url: String, + pub timestamp: String, + pub source: String, +} + +impl DownloadMetadata { + /// Create new metadata + pub fn new( + app_id: String, + version: Option, + filename: String, + sha256: String, + file_size: u64, + download_url: String, + source: String, + ) -> Self { + DownloadMetadata { + app_id, + version, + filename, + sha256, + file_size, + download_url, + timestamp: Local::now().to_rfc3339(), + source, + } + } + + /// Convert to JSON for storage + pub fn to_json(&self) -> Value { + json!({ + "app_id": self.app_id, + "version": self.version, + "filename": self.filename, + "sha256": self.sha256, + "file_size": self.file_size, + "download_url": self.download_url, + "timestamp": self.timestamp, + "source": self.source, + }) + } + + /// Load from JSON + pub fn from_json(value: &Value) -> Option { + Some(DownloadMetadata { + app_id: value.get("app_id")?.as_str()?.to_string(), + version: value.get("version").and_then(|v| v.as_str()).map(|s| s.to_string()), + filename: value.get("filename")?.as_str()?.to_string(), + sha256: value.get("sha256")?.as_str()?.to_string(), + file_size: value.get("file_size")?.as_u64()?, + download_url: value.get("download_url")?.as_str()?.to_string(), + timestamp: value.get("timestamp")?.as_str()?.to_string(), + source: value.get("source")?.as_str()?.to_string(), + }) + } +} + +/// Build request headers with anti-blocking measures +pub fn build_headers( + custom_user_agent: Option<&str>, + custom_headers: Option<&str>, +) -> HeaderMap { + let mut headers = HeaderMap::new(); + + // Set User-Agent to avoid detection as bot + let user_agent = custom_user_agent.unwrap_or( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \ + (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" + ); + headers.insert(USER_AGENT, HeaderValue::from_str(user_agent).unwrap_or_else(|_| { + HeaderValue::from_static("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36") + })); + + // Add standard headers to look more like a real browser + headers.insert("Accept", HeaderValue::from_static("*/*")); + headers.insert("Accept-Language", HeaderValue::from_static("en-US,en;q=0.9")); + headers.insert("Cache-Control", HeaderValue::from_static("no-cache")); + headers.insert("Pragma", HeaderValue::from_static("no-cache")); + headers.insert("Sec-Fetch-Dest", HeaderValue::from_static("document")); + headers.insert("Sec-Fetch-Mode", HeaderValue::from_static("navigate")); + headers.insert("Sec-Fetch-Site", HeaderValue::from_static("none")); + headers.insert("Upgrade-Insecure-Requests", HeaderValue::from_static("1")); + + // Parse custom headers if provided (format: "Header1:Value1,Header2:Value2") + if let Some(custom) = custom_headers { + for header_pair in custom.split(',') { + if let Some((k, v)) = header_pair.split_once(':') { + let key = k.trim(); + let value = v.trim(); + if let Ok(header_value) = HeaderValue::from_str(value) { + // parse key into owned HeaderName to avoid borrowing + if let Ok(header_name) = key.parse::() { + let _ = headers.insert(header_name, header_value); + } + } + } + } + } + + headers +} + +/// Compute SHA256 checksum of a file +pub fn compute_sha256(file_path: &Path) -> Result> { + let mut file = fs::File::open(file_path)?; + let mut hasher = Sha256::new(); + let mut buffer = [0; 8192]; + + loop { + let count = file.read(&mut buffer)?; + if count == 0 { + break; + } + hasher.update(&buffer[..count]); + } + + Ok(format!("{:x}", hasher.finalize())) +} + +/// Save metadata to a JSON file alongside the APK +pub fn save_metadata( + apk_path: &Path, + metadata: &DownloadMetadata, +) -> Result<(), Box> { + let mut metadata_path = PathBuf::from(apk_path); + metadata_path.set_extension("json"); + + let json_data = serde_json::json!({ + "metadata": metadata.to_json() + }); + + fs::write(metadata_path, json_data.to_string())?; + Ok(()) +} + +/// Load metadata from JSON file if it exists +pub fn load_metadata(apk_path: &Path) -> Option { + let mut metadata_path = PathBuf::from(apk_path); + metadata_path.set_extension("json"); + + if let Ok(content) = fs::read_to_string(&metadata_path) { + if let Ok(json) = serde_json::from_str::(&content) { + return DownloadMetadata::from_json(json.get("metadata")?); + } + } + None +} + +/// Verify file integrity using stored metadata +pub fn verify_file_integrity( + apk_path: &Path, + expected_sha256: Option<&str>, +) -> Result> { + let computed_hash = compute_sha256(apk_path)?; + + if let Some(expected) = expected_sha256 { + Ok(computed_hash.to_lowercase() == expected.to_lowercase()) + } else if let Some(metadata) = load_metadata(apk_path) { + Ok(computed_hash.to_lowercase() == metadata.sha256.to_lowercase()) + } else { + Ok(true) // No checksum to verify against + } +} + +/// Check if a partial download exists (for resume support) +pub fn get_partial_file_size(file_path: &Path) -> u64 { + if let Ok(metadata) = fs::metadata(file_path) { + metadata.len() + } else { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn test_build_headers_default() { + let headers = build_headers(None, None); + assert!(headers.contains_key("user-agent")); + assert!(headers.contains_key("accept")); + } + + #[test] + fn test_build_headers_custom() { + let headers = build_headers(Some("TestBot/1.0"), Some("X-Test:value")); + assert_eq!(headers.get("user-agent").unwrap().to_str().unwrap(), "TestBot/1.0"); + } + + #[test] + fn test_metadata_json_roundtrip() { + let metadata = DownloadMetadata::new( + "com.test".to_string(), + Some("1.0".to_string()), + "test.apk".to_string(), + "abc123".to_string(), + 1024, + "https://test.com".to_string(), + "test".to_string(), + ); + let json = metadata.to_json(); + let restored = DownloadMetadata::from_json(&json).unwrap(); + assert_eq!(metadata.app_id, restored.app_id); + } + + #[test] + fn test_compute_sha256() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("test.txt"); + fs::write(&file_path, b"hello world").unwrap(); + let hash = compute_sha256(&file_path).unwrap(); + assert_eq!(hash, "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"); + } + + #[test] + fn test_get_partial_file_size() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("test.txt"); + fs::write(&file_path, b"hello").unwrap(); + assert_eq!(get_partial_file_size(&file_path), 5); + assert_eq!(get_partial_file_size(&dir.path().join("none.txt")), 0); + } +} diff --git a/src/util/download_helper_tests.rs b/src/util/download_helper_tests.rs new file mode 100644 index 0000000..2a2ee8f --- /dev/null +++ b/src/util/download_helper_tests.rs @@ -0,0 +1,113 @@ +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::path::Path; + use tempfile::tempdir; + + #[test] + fn test_build_headers_default() { + let headers = build_headers(None, None); + assert!(headers.contains_key("user-agent")); + assert!(headers.contains_key("accept")); + } + + #[test] + fn test_build_headers_custom_user_agent() { + let headers = build_headers(Some("CustomBot/1.0"), None); + let ua = headers.get("user-agent").unwrap().to_str().unwrap(); + assert_eq!(ua, "CustomBot/1.0"); + } + + #[test] + fn test_build_headers_custom_headers() { + let headers = build_headers(None, Some("X-Custom:test,X-Another:value")); + assert!(headers.contains_key("x-custom")); + assert!(headers.contains_key("x-another")); + } + + #[test] + fn test_metadata_creation() { + let metadata = DownloadMetadata::new( + "com.test.app".to_string(), + Some("1.0.0".to_string()), + "test.apk".to_string(), + "abc123".to_string(), + 1024, + "https://example.com/test.apk".to_string(), + "test-source".to_string(), + ); + + assert_eq!(metadata.app_id, "com.test.app"); + assert_eq!(metadata.version, Some("1.0.0".to_string())); + assert_eq!(metadata.filename, "test.apk"); + } + + #[test] + fn test_metadata_json_roundtrip() { + let metadata = DownloadMetadata::new( + "com.test.app".to_string(), + Some("1.0.0".to_string()), + "test.apk".to_string(), + "abc123".to_string(), + 1024, + "https://example.com/test.apk".to_string(), + "test-source".to_string(), + ); + + let json = metadata.to_json(); + let restored = DownloadMetadata::from_json(&json).unwrap(); + + assert_eq!(metadata.app_id, restored.app_id); + assert_eq!(metadata.version, restored.version); + assert_eq!(metadata.sha256, restored.sha256); + } + + #[test] + fn test_compute_sha256() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("test.txt"); + fs::write(&file_path, b"hello world").unwrap(); + + let hash = compute_sha256(&file_path).unwrap(); + // SHA256 of "hello world" + assert_eq!(hash, "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"); + } + + #[test] + fn test_save_and_load_metadata() { + let dir = tempdir().unwrap(); + let apk_path = dir.path().join("test.apk"); + fs::write(&apk_path, b"fake apk").unwrap(); + + let metadata = DownloadMetadata::new( + "com.test.app".to_string(), + Some("1.0.0".to_string()), + "test.apk".to_string(), + "abc123".to_string(), + 1024, + "https://example.com/test.apk".to_string(), + "test-source".to_string(), + ); + + save_metadata(&apk_path, &metadata).unwrap(); + let loaded = load_metadata(&apk_path).unwrap(); + + assert_eq!(metadata.app_id, loaded.app_id); + assert_eq!(metadata.sha256, loaded.sha256); + } + + #[test] + fn test_get_partial_file_size() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("test.txt"); + fs::write(&file_path, b"hello").unwrap(); + + let size = get_partial_file_size(&file_path); + assert_eq!(size, 5); + + let nonexistent = dir.path().join("nonexistent.txt"); + let size = get_partial_file_size(&nonexistent); + assert_eq!(size, 0); + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index f11705c..5554c00 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,8 @@ pub mod progress_bar; +#[allow(dead_code)] +pub mod download_helper; + #[derive(Clone)] pub enum OutputFormat { Json, diff --git a/test_helpers.sh b/test_helpers.sh new file mode 100644 index 0000000..3731ec0 --- /dev/null +++ b/test_helpers.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Test script for download helper functions + +echo "=== Testing apkeep helper functions ===" +echo "" + +# Test 1: Download an APK +echo "Test 1: Downloading APK..." +apkeep -a com.mhss.app.mybrain@3.0.1 -d f-droid /tmp +if [ -f "/tmp/com.mhss.app.mybrain@3.0.1.apk" ]; then + echo "✓ APK downloaded successfully" + APK_FILE="/tmp/com.mhss.app.mybrain@3.0.1.apk" +else + echo "✗ APK download failed" + exit 1 +fi +echo "" + +# Test 2: Compute SHA256 checksum +echo "Test 2: Computing SHA256 checksum..." +CHECKSUM=$(sha256sum "$APK_FILE" | awk '{print $1}') +echo "✓ Checksum: $CHECKSUM" +echo "" + +# Test 3: Test custom headers +echo "Test 3: Testing custom User-Agent..." +apkeep -a com.zhiliaoapp.musically -d huawei-app-gallery --user-agent "TestBot/1.0" /tmp 2>&1 | head -5 +echo "" + +# Test 4: Test with custom headers +echo "Test 4: Testing custom headers..." +apkeep -a com.instagram.android --headers "Accept:application/json,X-Test:value" /tmp 2>&1 | head -5 +echo "" + +# Test 5: List versions (tests API connectivity) +echo "Test 5: Listing versions..." +apkeep -l -a com.mhss.app.mybrain -d f-droid +echo "" + +echo "=== All tests completed ==="