Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 55 additions & 15 deletions core/metainfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,7 @@ func NewMetaInfo(d Digest, blob io.Reader, pieceLength int64) (*MetaInfo, error)
if err != nil {
return nil, err
}
info := info{
PieceLength: pieceLength,
PieceSums: pieceSums,
Name: d.Hex(),
Length: length,
}
h, err := info.Hash()
if err != nil {
return nil, fmt.Errorf("compute info hash: %s", err)
}
return &MetaInfo{
info: info,
infoHash: h,
digest: d,
}, nil
return assembleMetaInfo(d, length, pieceSums, pieceLength)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is a 0.5% performance improvement worth the extra complexity we are introducing from extra code?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think given the code addition is very less and it is in the critical part of downloading and generating the metainfo, it should benefit us.

}

// InfoHash returns the torrent InfoHash.
Expand Down Expand Up @@ -173,3 +159,57 @@ func calcPieceSums(blob io.Reader, pieceLength int64) (length int64, pieceSums [
}
return length, pieceSums, nil
}

// NewMetaInfoFromBytes is the byte-slice variant of NewMetaInfo. It avoids
// the io.Reader indirection and the 32 KB scratch buffer per piece that
// io.Copy allocates when reading from a bytes.Reader wrapped in
// io.LimitReader, by hashing each piece directly off the underlying slice.
//
// Result is bit-identical to NewMetaInfo(d, bytes.NewReader(data),
// pieceLength). Caller is expected to have already verified that d is the
// correct digest for data.
func NewMetaInfoFromBytes(d Digest, data []byte, pieceLength int64) (*MetaInfo, error) {
Comment thread
sambhav-jain-16 marked this conversation as resolved.
length, pieceSums, err := calcPieceSumsFromBytes(data, pieceLength)
if err != nil {
return nil, err
}
return assembleMetaInfo(d, length, pieceSums, pieceLength)
}

// assembleMetaInfo constructs a MetaInfo from pre-computed piece sums.
func assembleMetaInfo(d Digest, length int64, pieceSums []uint32, pieceLength int64) (*MetaInfo, error) {
info := info{
PieceLength: pieceLength,
PieceSums: pieceSums,
Name: d.Hex(),
Length: length,
}
h, err := info.Hash()
if err != nil {
return nil, fmt.Errorf("compute info hash: %s", err)
}
return &MetaInfo{info: info, infoHash: h, digest: d}, nil
}

// calcPieceSumsFromBytes hashes data in pieceLength chunks. Mirrors
// calcPieceSums but avoids per-piece hash.Hash32 allocation by using
// crc32.ChecksumIEEE, and pre-sizes pieceSums to its exact final length.
func calcPieceSumsFromBytes(data []byte, pieceLength int64) (int64, []uint32, error) {
if pieceLength <= 0 {
return 0, nil, errors.New("piece length must be positive")
}
n := int64(len(data))
if n == 0 {
return 0, nil, nil
}
numPieces := (n-1)/pieceLength + 1
pieceSums := make([]uint32, 0, numPieces)
for offset := int64(0); offset < n; offset += pieceLength {
end := offset + pieceLength
if end > n {
end = n
}
pieceSums = append(pieceSums, PieceSum(data[offset:end]))
}
return n, pieceSums, nil
Comment thread
sambhav-jain-16 marked this conversation as resolved.
}
86 changes: 86 additions & 0 deletions core/metainfo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
package core

import (
"bytes"
"math/rand"
"testing"

"github.com/stretchr/testify/require"

"github.com/uber/kraken/utils/memsize"
"github.com/uber/kraken/utils/randutil"
)

func TestMetaInfoGetPieceLength(t *testing.T) {
Expand Down Expand Up @@ -118,3 +120,87 @@ func TestMetaInfoSerializationLimit(t *testing.T) {
})
}
}

func TestNewMetaInfoFromBytes_MatchesReader(t *testing.T) {
cases := []struct {
name string
size uint64
pieceLength uint64
}{
{"empty", 0, 256},
{"smaller_than_piece", 100, 256},
{"exact_one_piece", 256, 256},
{"two_pieces", 512, 256},
{"non_aligned", 700, 256},
{"many_pieces", 1 << 20, 256},
{"piece_length_one", 100, 1},
{"piece_larger_than_blob", 100, 1024},
{"invalid_piece_length", 100, 0},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
require := require.New(t)
data := randutil.Text(tc.size)
d, err := NewDigester().FromBytes(data)
require.NoError(err)

miReader, errReader := NewMetaInfo(d, bytes.NewReader(data), int64(tc.pieceLength))
miBytes, errBytes := NewMetaInfoFromBytes(d, data, int64(tc.pieceLength))
Comment thread
sambhav-jain-16 marked this conversation as resolved.
require.Equal(errReader != nil, errBytes != nil)
if errReader != nil {
require.EqualError(errBytes, errReader.Error())
return
}
require.NoError(errReader)
require.NoError(errBytes)
require.Equal(miReader.InfoHash(), miBytes.InfoHash())
require.Equal(miReader.Length(), miBytes.Length())
require.Equal(miReader.NumPieces(), miBytes.NumPieces())
require.Equal(miReader.PieceLength(), miBytes.PieceLength())
for i := 0; i < miReader.NumPieces(); i++ {
require.Equal(miReader.GetPieceSum(i), miBytes.GetPieceSum(i),
"piece %d sum mismatch", i)
}
bReader, err := miReader.Serialize()
require.NoError(err)
bBytes, err := miBytes.Serialize()
require.NoError(err)
require.Equal(bReader, bBytes)
})
}
}

func BenchmarkNewMetaInfoFromBytes(b *testing.B) {
cases := []struct {
name string
blobSize uint64
Comment thread
sambhav-jain-16 marked this conversation as resolved.
pieceLength uint64
}{
{"1MB_4pc", 1 << 20, 256 << 10},
{"16MB_64pc", 16 << 20, 256 << 10},
{"64MB_256pc", 64 << 20, 256 << 10},
{"256MB_1024pc", 256 << 20, 256 << 10},
{"16MB_4pc_4MBpc", 16 << 20, 4 << 20},
{"16MB_16pc_1MBpc", 16 << 20, 1 << 20},
{"16MB_1024pc_16KBpc", 16 << 20, 16 << 10},
}
for _, tc := range cases {
b.Run(tc.name, func(b *testing.B) {
data := randutil.Text(tc.blobSize)
d, err := NewDigester().FromBytes(data)
require.NoError(b, err)
pl := int64(tc.pieceLength)

b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(tc.blobSize))
for b.Loop() {
mi, err := NewMetaInfoFromBytes(d, data, pl)
require.NoError(b, err)
if mi == nil {
b.Fatal("nil MetaInfo")
}
}
})
}
}
7 changes: 7 additions & 0 deletions core/piece_hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,10 @@ import (
func PieceHash() hash.Hash32 {
return crc32.NewIEEE()
}

// PieceSum returns the checksum of b using the same algorithm as PieceHash.
// It is equivalent to creating a PieceHash, writing b, and calling Sum32,
// but avoids allocating a hash.Hash32 object.
func PieceSum(b []byte) uint32 {
return crc32.ChecksumIEEE(b)
}
3 changes: 1 addition & 2 deletions lib/store/ca_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
package store

import (
"bytes"
"container/list"
"errors"
"fmt"
Expand Down Expand Up @@ -299,7 +298,7 @@ func (s *CAStore) generateMetadataFromBytes(name string, data []byte, pieceLengt
if err != nil {
return nil, fmt.Errorf("new digest from hex: %s", err)
}
metaInfo, err := core.NewMetaInfo(digest, bytes.NewReader(data), pieceLength)
metaInfo, err := core.NewMetaInfoFromBytes(digest, data, pieceLength)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also use it for metainfogen.GenerateFromBuffer, which currently also creates a new reader?

Copy link
Copy Markdown
Collaborator Author

@sambhav-jain-16 sambhav-jain-16 May 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Im planning to remove that function, no method calls it

if err != nil {
return nil, fmt.Errorf("generate metainfo: %w", err)
}
Expand Down
Loading