diff --git a/core/metainfo.go b/core/metainfo.go index 3722ba802..c1d6cb415 100644 --- a/core/metainfo.go +++ b/core/metainfo.go @@ -57,21 +57,7 @@ func NewMetaInfo(d Digest, blob io.Reader, pieceLength int64) (*MetaInfo, error) if err != nil { return nil, err } - info := info{ - PieceLength: pieceLength, - PieceSums: pieceSums, - Name: d.Hex(), - Length: length, - } - h, err := info.Hash() - if err != nil { - return nil, fmt.Errorf("compute info hash: %s", err) - } - return &MetaInfo{ - info: info, - infoHash: h, - digest: d, - }, nil + return assembleMetaInfo(d, length, pieceSums, pieceLength) } // InfoHash returns the torrent InfoHash. @@ -173,3 +159,57 @@ func calcPieceSums(blob io.Reader, pieceLength int64) (length int64, pieceSums [ } return length, pieceSums, nil } + +// NewMetaInfoFromBytes is the byte-slice variant of NewMetaInfo. It avoids +// the io.Reader indirection and the 32 KB scratch buffer per piece that +// io.Copy allocates when reading from a bytes.Reader wrapped in +// io.LimitReader, by hashing each piece directly off the underlying slice. +// +// Result is bit-identical to NewMetaInfo(d, bytes.NewReader(data), +// pieceLength). Caller is expected to have already verified that d is the +// correct digest for data. +func NewMetaInfoFromBytes(d Digest, data []byte, pieceLength int64) (*MetaInfo, error) { + length, pieceSums, err := calcPieceSumsFromBytes(data, pieceLength) + if err != nil { + return nil, err + } + return assembleMetaInfo(d, length, pieceSums, pieceLength) +} + +// assembleMetaInfo constructs a MetaInfo from pre-computed piece sums. +func assembleMetaInfo(d Digest, length int64, pieceSums []uint32, pieceLength int64) (*MetaInfo, error) { + info := info{ + PieceLength: pieceLength, + PieceSums: pieceSums, + Name: d.Hex(), + Length: length, + } + h, err := info.Hash() + if err != nil { + return nil, fmt.Errorf("compute info hash: %s", err) + } + return &MetaInfo{info: info, infoHash: h, digest: d}, nil +} + +// calcPieceSumsFromBytes hashes data in pieceLength chunks. Mirrors +// calcPieceSums but avoids per-piece hash.Hash32 allocation by using +// crc32.ChecksumIEEE, and pre-sizes pieceSums to its exact final length. +func calcPieceSumsFromBytes(data []byte, pieceLength int64) (int64, []uint32, error) { + if pieceLength <= 0 { + return 0, nil, errors.New("piece length must be positive") + } + n := int64(len(data)) + if n == 0 { + return 0, nil, nil + } + numPieces := (n-1)/pieceLength + 1 + pieceSums := make([]uint32, 0, numPieces) + for offset := int64(0); offset < n; offset += pieceLength { + end := offset + pieceLength + if end > n { + end = n + } + pieceSums = append(pieceSums, PieceSum(data[offset:end])) + } + return n, pieceSums, nil +} diff --git a/core/metainfo_test.go b/core/metainfo_test.go index 42ec7385c..0df585fd3 100644 --- a/core/metainfo_test.go +++ b/core/metainfo_test.go @@ -14,12 +14,14 @@ package core import ( + "bytes" "math/rand" "testing" "github.com/stretchr/testify/require" "github.com/uber/kraken/utils/memsize" + "github.com/uber/kraken/utils/randutil" ) func TestMetaInfoGetPieceLength(t *testing.T) { @@ -118,3 +120,87 @@ func TestMetaInfoSerializationLimit(t *testing.T) { }) } } + +func TestNewMetaInfoFromBytes_MatchesReader(t *testing.T) { + cases := []struct { + name string + size uint64 + pieceLength uint64 + }{ + {"empty", 0, 256}, + {"smaller_than_piece", 100, 256}, + {"exact_one_piece", 256, 256}, + {"two_pieces", 512, 256}, + {"non_aligned", 700, 256}, + {"many_pieces", 1 << 20, 256}, + {"piece_length_one", 100, 1}, + {"piece_larger_than_blob", 100, 1024}, + {"invalid_piece_length", 100, 0}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + require := require.New(t) + data := randutil.Text(tc.size) + d, err := NewDigester().FromBytes(data) + require.NoError(err) + + miReader, errReader := NewMetaInfo(d, bytes.NewReader(data), int64(tc.pieceLength)) + miBytes, errBytes := NewMetaInfoFromBytes(d, data, int64(tc.pieceLength)) + require.Equal(errReader != nil, errBytes != nil) + if errReader != nil { + require.EqualError(errBytes, errReader.Error()) + return + } + require.NoError(errReader) + require.NoError(errBytes) + require.Equal(miReader.InfoHash(), miBytes.InfoHash()) + require.Equal(miReader.Length(), miBytes.Length()) + require.Equal(miReader.NumPieces(), miBytes.NumPieces()) + require.Equal(miReader.PieceLength(), miBytes.PieceLength()) + for i := 0; i < miReader.NumPieces(); i++ { + require.Equal(miReader.GetPieceSum(i), miBytes.GetPieceSum(i), + "piece %d sum mismatch", i) + } + bReader, err := miReader.Serialize() + require.NoError(err) + bBytes, err := miBytes.Serialize() + require.NoError(err) + require.Equal(bReader, bBytes) + }) + } +} + +func BenchmarkNewMetaInfoFromBytes(b *testing.B) { + cases := []struct { + name string + blobSize uint64 + pieceLength uint64 + }{ + {"1MB_4pc", 1 << 20, 256 << 10}, + {"16MB_64pc", 16 << 20, 256 << 10}, + {"64MB_256pc", 64 << 20, 256 << 10}, + {"256MB_1024pc", 256 << 20, 256 << 10}, + {"16MB_4pc_4MBpc", 16 << 20, 4 << 20}, + {"16MB_16pc_1MBpc", 16 << 20, 1 << 20}, + {"16MB_1024pc_16KBpc", 16 << 20, 16 << 10}, + } + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + data := randutil.Text(tc.blobSize) + d, err := NewDigester().FromBytes(data) + require.NoError(b, err) + pl := int64(tc.pieceLength) + + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(int64(tc.blobSize)) + for b.Loop() { + mi, err := NewMetaInfoFromBytes(d, data, pl) + require.NoError(b, err) + if mi == nil { + b.Fatal("nil MetaInfo") + } + } + }) + } +} diff --git a/core/piece_hash.go b/core/piece_hash.go index 0880e852f..1d6015f36 100644 --- a/core/piece_hash.go +++ b/core/piece_hash.go @@ -22,3 +22,10 @@ import ( func PieceHash() hash.Hash32 { return crc32.NewIEEE() } + +// PieceSum returns the checksum of b using the same algorithm as PieceHash. +// It is equivalent to creating a PieceHash, writing b, and calling Sum32, +// but avoids allocating a hash.Hash32 object. +func PieceSum(b []byte) uint32 { + return crc32.ChecksumIEEE(b) +} diff --git a/lib/store/ca_store.go b/lib/store/ca_store.go index 0b569af65..169290e04 100644 --- a/lib/store/ca_store.go +++ b/lib/store/ca_store.go @@ -14,7 +14,6 @@ package store import ( - "bytes" "container/list" "errors" "fmt" @@ -299,7 +298,7 @@ func (s *CAStore) generateMetadataFromBytes(name string, data []byte, pieceLengt if err != nil { return nil, fmt.Errorf("new digest from hex: %s", err) } - metaInfo, err := core.NewMetaInfo(digest, bytes.NewReader(data), pieceLength) + metaInfo, err := core.NewMetaInfoFromBytes(digest, data, pieceLength) if err != nil { return nil, fmt.Errorf("generate metainfo: %w", err) }