diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 015d70b..828f9f9 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -2,13 +2,16 @@ name: E2E Protocol Tests on: workflow_dispatch: + # Run on every PR — stacked PRs against feature branches (e.g. the Unbounded + # outbound stack) should still get live-machine coverage before they merge + # into main. The path filter keeps doc-only PRs from consuming droplets. pull_request: - branches: [main] paths: - '**.go' - 'go.mod' - 'go.sum' - 'Dockerfile' + - '.github/workflows/e2e.yaml' env: DROPLET_NAME: "e2e-lantern-box-${{ github.run_id }}" @@ -39,6 +42,14 @@ jobs: ./cmd ls -lh lantern-box + - name: Build unbounded-rig (linux/amd64) + run: | + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags="-s -w" \ + -o unbounded-rig \ + ./test/e2e/unbounded-rig + ls -lh unbounded-rig + - name: Generate test credentials id: creds run: | @@ -143,20 +154,59 @@ jobs: run: | SSH_OPTS="-o StrictHostKeyChecking=no -o ServerAliveInterval=30 -i e2e_key" - # Compress binary before upload + # Compress binaries before upload gzip -1 -c lantern-box > lantern-box.gz - ls -lh lantern-box.gz + gzip -1 -c unbounded-rig > unbounded-rig.gz + ls -lh lantern-box.gz unbounded-rig.gz + + # Generate a self-signed cert pair for the unbounded rig (freddie TLS + # and the inner QUIC handshake presented by egress). + openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout unbounded-rig-key.pem -out unbounded-rig-cert.pem -days 1 \ + -nodes -subj "/CN=${DROPLET_IP}" \ + -addext "subjectAltName=IP:${DROPLET_IP}" # Upload files echo "Starting SCP upload..." - scp $SSH_OPTS lantern-box.gz cert.pem key.pem plain.wasm reflex-client-cert.pem reflex-client-key.pem root@"$DROPLET_IP":/root/ + scp $SSH_OPTS lantern-box.gz unbounded-rig.gz cert.pem key.pem plain.wasm reflex-client-cert.pem reflex-client-key.pem unbounded-rig-cert.pem unbounded-rig-key.pem root@"$DROPLET_IP":/root/ echo "SCP upload complete" - # Decompress and make binary executable - echo "Decompressing binary..." + # Decompress and make binaries executable + echo "Decompressing binaries..." ssh $SSH_OPTS root@"$DROPLET_IP" \ - "gzip -d /root/lantern-box.gz && chmod +x /root/lantern-box" - echo "Binary ready" + "gzip -d /root/lantern-box.gz /root/unbounded-rig.gz && chmod +x /root/lantern-box /root/unbounded-rig" + echo "Binaries ready" + + # Pin DNS to public resolvers. The default DO Ubuntu image points + # /etc/resolv.conf at systemd-resolved (127.0.0.53), which forwards + # to DO's 67.207.67.x infrastructure; we observed that resolver + # break partway through a test run on 2026-05-09, causing the + # later sing-box servers (Reflex + the unbounded-rig's egress) + # to fail DNS lookups for example.com / stun.l.google.com with + # `context deadline exceeded`. The earlier servers had already + # resolved successfully, so the failure was non-deterministic + # and bisected to "DO resolver flake mid-run". + # + # Pinning to 1.1.1.1 + 8.8.8.8 directly bypasses systemd-resolved + # and any DO-side caching layer. We disable systemd-resolved so + # it can't reclaim /etc/resolv.conf on a service restart. + echo "Pinning DNS to public resolvers..." + ssh $SSH_OPTS root@"$DROPLET_IP" bash << 'DNSEOF' + systemctl disable --now systemd-resolved 2>/dev/null || true + rm -f /etc/resolv.conf + cat > /etc/resolv.conf << RESOLV + nameserver 1.1.1.1 + nameserver 8.8.8.8 + options timeout:2 attempts:2 + RESOLV + # Quick sanity check — fail fast if pinning didn't take. + if ! getent hosts example.com >/dev/null; then + echo "DNS pinning sanity check FAILED" >&2 + cat /etc/resolv.conf + exit 1 + fi + echo "DNS pinned: $(grep nameserver /etc/resolv.conf | tr '\n' ' ')" + DNSEOF # Start Python HTTP server to serve plain.wasm (for WATER) echo "Starting WASM HTTP server..." @@ -247,20 +297,24 @@ jobs: JSONEOF scp $SSH_OPTS /tmp/reflex-server.json root@"$DROPLET_IP":/root/reflex-server.json - # Start all 4 server processes + # Start all 4 sing-box protocol servers + the Unbounded rig + # (freddie signaling on 9000 + egress SOCKS5+QUIC/WS on 8000 + + # in-process broflake widget) echo "Starting servers..." ssh $SSH_OPTS root@"$DROPLET_IP" "nohup /root/lantern-box run --config /root/algeneva-server.json > /root/algeneva-server.log 2>&1 < /dev/null & nohup /root/lantern-box run --config /root/samizdat-server.json > /root/samizdat-server.log 2>&1 < /dev/null & nohup /root/lantern-box run --config /root/water-server.json > /root/water-server.log 2>&1 < /dev/null & nohup /root/lantern-box run --config /root/reflex-server.json > /root/reflex-server.log 2>&1 < /dev/null & + TLS_CERT_FILE=/root/unbounded-rig-cert.pem TLS_KEY_FILE=/root/unbounded-rig-key.pem FREDDIE_ADDR=:9000 EGRESS_ADDR=:8000 nohup /root/unbounded-rig > /root/unbounded-rig.log 2>&1 < /dev/null & sleep 1" echo "Servers launched" - # Wait for servers to be ready (WATER needs time to download/compile WASM) + # Wait for servers to be ready (WATER needs time to download/compile + # WASM; unbounded-rig needs freddie + egress TCP to bind). echo "Checking port readiness..." # Note: must use bash explicitly since Ubuntu default shell is dash (no /dev/tcp support) ssh $SSH_OPTS root@"$DROPLET_IP" bash << 'READYEOF' - for port in 9001 9002 9003 9004; do + for port in 9001 9002 9003 9004 9000 8000; do echo "Waiting for port $port to be ready..." for i in $(seq 1 60); do if echo > /dev/tcp/127.0.0.1/$port 2>/dev/null; then @@ -565,6 +619,86 @@ jobs: kill $CLIENT_PID 2>/dev/null || true + - name: Test Unbounded + id: test_unbounded + continue-on-error: true + env: + DROPLET_IP: ${{ steps.droplet.outputs.droplet_ip }} + run: | + # Client config: unbounded outbound talks to the rig's freddie (TLS, + # self-signed, hence insecure_do_not_verify_discovery_cert) and egress + # (plain ws://; inner QUIC handshake is self-signed hence + # insecure_do_not_verify_client_cert). + cat > /tmp/unbounded-client.json << JSONEOF + { + "log": {"level": "debug"}, + "inbounds": [{ + "type": "mixed", + "tag": "mixed-in", + "listen": "127.0.0.1", + "listen_port": 1085 + }], + "outbounds": [{ + "type": "unbounded", + "tag": "unbounded-out", + "discovery_srv": "https://${DROPLET_IP}:9000", + "egress_addr": "ws://${DROPLET_IP}:8000", + "stun_servers": ["stun:stun.l.google.com:19302"], + "stun_batch_size": 1, + "consumer_session_id": "ci-e2e", + "insecure_do_not_verify_client_cert": true, + "insecure_do_not_verify_discovery_cert": true + }], + "route": {"final": "unbounded-out"} + } + JSONEOF + + setsid ./lantern-box run --config /tmp/unbounded-client.json > /tmp/unbounded-client.log 2>&1 & + CLIENT_PID=$! + # Generous startup grace — ICE + DTLS + QUIC handshake on first dial + # can take 10-20s over real network paths. + sleep 15 + + if ! kill -0 $CLIENT_PID 2>/dev/null; then + echo "Unbounded client failed to start" + cat /tmp/unbounded-client.log + exit 1 + fi + + # Test HTTP — the whole chain: mixed inbound -> unbounded outbound + # -> broflake consumer -> WebRTC/DTLS -> broflake widget (on droplet) + # -> QUIC/WS -> egress SOCKS5 (on droplet) -> example.com + set +e + RESPONSE=$(curl -sf -x socks5h://127.0.0.1:1085 -m 60 http://example.com) + CURL_EXIT=$? + set -e + if echo "$RESPONSE" | grep -q "Example Domain"; then + echo "Unbounded HTTP test PASSED" + else + echo "Unbounded HTTP test FAILED (curl exit code: $CURL_EXIT)" + echo "Response: $RESPONSE" + cat /tmp/unbounded-client.log + kill $CLIENT_PID 2>/dev/null || true + exit 1 + fi + + # Test HTTPS + set +e + RESPONSE=$(curl -sf -x socks5h://127.0.0.1:1085 -m 60 https://example.com) + CURL_EXIT=$? + set -e + if echo "$RESPONSE" | grep -q "Example Domain"; then + echo "Unbounded HTTPS test PASSED" + else + echo "Unbounded HTTPS test FAILED (curl exit code: $CURL_EXIT)" + echo "Response: $RESPONSE" + cat /tmp/unbounded-client.log + kill $CLIENT_PID 2>/dev/null || true + exit 1 + fi + + kill $CLIENT_PID 2>/dev/null || true + - name: Check test results if: always() run: | @@ -594,6 +728,12 @@ jobs: echo "Reflex: FAILED" FAILED=1 fi + if [ "${{ steps.test_unbounded.outcome }}" = "success" ]; then + echo "Unbounded: PASSED" + else + echo "Unbounded: FAILED" + FAILED=1 + fi echo "========================" if [ "$FAILED" -ne 0 ]; then echo "One or more tests failed" diff --git a/option/unbounded.go b/option/unbounded.go index e924172..f935548 100644 --- a/option/unbounded.go +++ b/option/unbounded.go @@ -45,6 +45,13 @@ type UnboundedOutboundOptions struct { // WebRTC / signaling parameters. DiscoverySrv string `json:"discovery_srv,omitempty"` DiscoveryEndpoint string `json:"discovery_endpoint,omitempty"` + // InsecureDoNotVerifyDiscoveryCert skips TLS verification of the + // signaling server's (freddie's) cert. Only for test/dev against + // self-signed rigs; production freddie deployments present a real cert + // and this flag must be false. Ignored when a direct transport is + // injected on the context (radiance's production path), which carries + // its own verification policy. + InsecureDoNotVerifyDiscoveryCert bool `json:"insecure_do_not_verify_discovery_cert,omitempty"` GenesisAddr string `json:"genesis_addr,omitempty"` NATFailTimeout int `json:"nat_fail_timeout,omitempty"` // seconds STUNBatchSize int `json:"stun_batch_size,omitempty"` diff --git a/protocol/unbounded/outbound.go b/protocol/unbounded/outbound.go index 89f35eb..fdb6e49 100644 --- a/protocol/unbounded/outbound.go +++ b/protocol/unbounded/outbound.go @@ -31,6 +31,7 @@ package unbounded import ( "context" "crypto/rand" + "crypto/tls" "fmt" "math/big" "net" @@ -149,7 +150,7 @@ func NewOutbound( return nil, fmt.Errorf("unbounded: build pion net shim: %w", err) } rtcOpt.Net = rtcNet - rtcOpt.HTTPClient = signalingClient(ctx, outboundDialer) + rtcOpt.HTTPClient = signalingClient(ctx, outboundDialer, opts.InsecureDoNotVerifyDiscoveryCert) o := &Outbound{ Adapter: outbound.NewAdapterWithDialerOptions( @@ -344,7 +345,7 @@ func (o *Outbound) recordWatchdog(ctx context.Context, elapsed time.Duration) { // 2. Fall back to a plain transport that dials via the outbound's own dialer. // This does NOT bypass the tunnel, so it's only suitable for standalone // sing-box use and tests — not on-device production. -func signalingClient(ctx context.Context, fallback N.Dialer) *http.Client { +func signalingClient(ctx context.Context, fallback N.Dialer, insecureSkipDiscoveryVerify bool) *http.Client { if rt := lbAdapter.DirectTransportFromContext(ctx); rt != nil { return &http.Client{Transport: rt} } @@ -353,6 +354,13 @@ func signalingClient(ctx context.Context, fallback N.Dialer) *http.Client { return fallback.DialContext(ctx, network, M.ParseSocksaddr(addr)) }, } + if insecureSkipDiscoveryVerify { + // Test/dev escape hatch: the standalone/test signaling path accepts a + // self-signed freddie cert. Production operation prefers the direct + // transport returned above and never reaches this branch, so this + // is not a general-purpose TLS relaxation. + tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec + } return &http.Client{Transport: tr} } diff --git a/protocol/unbounded/outbound_test.go b/protocol/unbounded/outbound_test.go index 929d4b4..96e55ad 100644 --- a/protocol/unbounded/outbound_test.go +++ b/protocol/unbounded/outbound_test.go @@ -28,7 +28,7 @@ func TestSignalingClient_UsesInjectedTransport(t *testing.T) { rt := http.DefaultTransport ctx := lbAdapter.ContextWithDirectTransport(context.Background(), rt) - client := signalingClient(ctx, nil /* no fallback — should not be called */) + client := signalingClient(ctx, nil /* no fallback — should not be called */, false) req, _ := http.NewRequest("GET", srv.URL, nil) resp, err := client.Do(req) if err != nil { @@ -47,7 +47,7 @@ func TestSignalingClient_UsesInjectedTransport(t *testing.T) { // direct transport on the context, so we fall back to the outbound dialer // via a plain http.Transport. func TestSignalingClient_FallbackWhenNoTransport(t *testing.T) { - client := signalingClient(context.Background(), &noopDialer{}) + client := signalingClient(context.Background(), &noopDialer{}, false) if client == nil || client.Transport == nil { t.Fatal("fallback signaling client should not be nil") } diff --git a/test/e2e/unbounded-rig/main.go b/test/e2e/unbounded-rig/main.go new file mode 100644 index 0000000..efadde8 --- /dev/null +++ b/test/e2e/unbounded-rig/main.go @@ -0,0 +1,229 @@ +// Command unbounded-rig runs freddie (signaling), the broflake egress +// (SOCKS5 over QUIC-over-WebSocket), and a native broflake widget in a single +// process. It exists to provide the server-side topology for a live-machine +// e2e test of lantern-box's unbounded outbound. +// +// The binary is deployed to an ephemeral VM (currently a DigitalOcean +// droplet, see .github/workflows/e2e.yaml). A CI-side lantern-box then points +// its unbounded outbound at the VM's public IP. This validates the full +// chain — real TLS, real STUN, real NAT traversal, a real quic-go QUIC +// transport between two distinct processes — which the in-process test at +// test/e2e/unbounded_test.go cannot. +// +// Wire-compatible knobs: +// +// FREDDIE_ADDR listen address for the signaling server (default :9000) +// EGRESS_ADDR listen address for the egress server (default :8000) +// STUN_SERVER STUN server URL for widget's ICE batch +// (default stun:stun.l.google.com:19302) +// TLS_CERT_FILE PEM path; used by both freddie and egress TLS +// TLS_KEY_FILE PEM path; used by both freddie and egress TLS +// +// The egress wraps a go-socks5 server so SOCKS5 CONNECT requests exit to the +// public internet. The widget is configured with ClientType=widget and +// connects to freddie as a volunteer; it pairs with whichever consumer +// arrives first. +package main + +import ( + "context" + "crypto/tls" + "io" + "log" + "net" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/armon/go-socks5" + UBClientcore "github.com/getlantern/broflake/clientcore" + UBCommon "github.com/getlantern/broflake/common" + "github.com/getlantern/broflake/egress" + "github.com/getlantern/broflake/freddie" +) + +func main() { + freddieAddr := envDefault("FREDDIE_ADDR", ":9000") + egressAddr := envDefault("EGRESS_ADDR", ":8000") + stunServer := envDefault("STUN_SERVER", "stun:stun.l.google.com:19302") + tlsCertFile := os.Getenv("TLS_CERT_FILE") + tlsKeyFile := os.Getenv("TLS_KEY_FILE") + + if tlsCertFile == "" || tlsKeyFile == "" { + log.Fatal("TLS_CERT_FILE and TLS_KEY_FILE are required") + } + + cert, err := tls.LoadX509KeyPair(tlsCertFile, tlsKeyFile) + if err != nil { + log.Fatalf("load cert/key: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigs + log.Println("shutting down") + cancel() + }() + + UBCommon.SetDebugLogger(log.New(os.Stderr, "[broflake] ", log.LstdFlags)) + + // freddie — TLS so the consumer-side kindling transport can hit it. + startFreddie(ctx, freddieAddr, tlsCertFile, tlsKeyFile) + + // egress — SOCKS5 server wrapped in the broflake egress layer. The tls.Config + // passed to egress.NewListener is used for the INNER QUIC handshake (QUIC + // requires TLS 1.3) — not the outer WebSocket. The outer WebSocket is plain + // ws:// over TCP. The consumer validates the inner cert via + // InsecureDoNotVerifyClientCert. + egressURL := startEgress(ctx, egressAddr, cert) + + // Readiness gate — widget won't pair until freddie's /v1/ accepts polls. + // The CI runner also waits on freddie externally, but widget is in-proc + // with freddie so we wait locally before connecting. + waitForFreddie(ctx, "https://127.0.0.1"+firstPort(freddieAddr), 20*time.Second) + + freddieURL := "https://127.0.0.1" + firstPort(freddieAddr) + startWidget(ctx, freddieURL, egressURL, stunServer) + + log.Printf("rig ready: freddie=%s egress=%s", freddieURL, egressURL) + <-ctx.Done() +} + +func startFreddie(ctx context.Context, addr, cert, key string) { + f, err := freddie.New(ctx, addr) + if err != nil { + log.Fatalf("freddie.New: %v", err) + } + go func() { + if err := f.ListenAndServeTLS(cert, key); err != nil && err != http.ErrServerClosed { + log.Printf("freddie exited: %v", err) + } + }() + log.Printf("freddie listening on %s", addr) +} + +func startEgress(ctx context.Context, addr string, cert tls.Certificate) string { + l, err := net.Listen("tcp", addr) + if err != nil { + log.Fatalf("egress listen: %v", err) + } + tlsConfig := &tls.Config{ + Certificates: []tls.Certificate{cert}, + NextProtos: []string{"broflake"}, + InsecureSkipVerify: true, + } + ll, err := egress.NewListener(ctx, l, tlsConfig) + if err != nil { + log.Fatalf("egress.NewListener: %v", err) + } + conf := &socks5.Config{} + proxy, err := socks5.New(conf) + if err != nil { + log.Fatalf("socks5.New: %v", err) + } + go func() { + if err := proxy.Serve(ll); err != nil { + log.Printf("egress SOCKS5 exited: %v", err) + } + }() + log.Printf("egress listening on %s", addr) + return "ws://127.0.0.1" + firstPort(addr) +} + +func startWidget(ctx context.Context, freddieURL, egressURL, stunServer string) { + bfOpt := UBClientcore.NewDefaultBroflakeOptions() + bfOpt.ClientType = "widget" + // Match the in-process test; small pools are fine for a single consumer. + bfOpt.CTableSize = 2 + bfOpt.PTableSize = 2 + + rtcOpt := UBClientcore.NewDefaultWebRTCOptions() + rtcOpt.DiscoverySrv = freddieURL + rtcOpt.STUNBatch = func(_ uint32) ([]string, error) { + return []string{stunServer}, nil + } + // The widget polls freddie over TLS. This rig's freddie presents a + // self-signed cert, so the widget's http client skips verification only + // for this local-rig scenario. Real widgets in production hit a freddie + // deployment with a trusted cert and do not skip verification. + rtcOpt.HTTPClient = &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec + }, + } + + egOpt := UBClientcore.NewDefaultEgressOptions() + egOpt.Addr = egressURL + + _, ui, err := UBClientcore.NewBroflake(bfOpt, rtcOpt, egOpt) + if err != nil { + log.Fatalf("NewBroflake: %v", err) + } + + // Stop on shutdown so the widget releases its freddie registration. + var once sync.Once + stop := func() { once.Do(ui.Stop) } + go func() { + <-ctx.Done() + stop() + }() +} + +func waitForFreddie(ctx context.Context, target string, timeout time.Duration) { + client := &http.Client{ + Timeout: 1 * time.Second, + Transport: &http.Transport{ + // freddie serves a self-signed cert in this local rig, so the + // readiness poll intentionally skips verification when probing that + // rig-managed endpoint. Not a general-purpose TLS policy. + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec + }, + } + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + timer := time.NewTimer(timeout) + defer timer.Stop() + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + // Bail out rather than silently starting the widget against a + // dead freddie — the CI deploy step would otherwise proceed with + // a broken rig and only notice when the client test times out. + log.Fatalf("freddie never became ready at %s within %s", target, timeout) + case <-ticker.C: + resp, err := client.Get(target + "/v1/") + if err == nil { + io.Copy(io.Discard, resp.Body) + resp.Body.Close() + return + } + } + } +} + +func envDefault(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} + +// firstPort extracts ":port" from a listen spec like ":9000" or "0.0.0.0:9000". +// Used to build a loopback URL the rig itself can use for waitForFreddie. +func firstPort(addr string) string { + _, port, err := net.SplitHostPort(addr) + if err != nil { + return addr + } + return ":" + port +} +