From ae256a687a45ed56f329fd0808050f31cd428b52 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 14 Feb 2022 15:15:14 +0900 Subject: [PATCH] Allow optimizing images with prioritiezed files info shared via registry Signed-off-by: Kohei Tokunaga --- analyzer/analyzer.go | 2 +- analyzer/recorder/images.go | 253 ++++++++++++++++++++ analyzer/recorder/recorder.go | 2 - analyzer/recorder/util.go | 263 +++++++++++++++++++++ cmd/ctr-remote/commands/convert.go | 132 +++++++++-- cmd/ctr-remote/commands/optimize.go | 51 ++-- docs/ctr-remote.md | 175 ++++++++++++++ estargz/estargz.go | 9 + nativeconverter/estargz/estargz.go | 10 +- nativeconverter/zstdchunked/zstdchunked.go | 21 +- script/optimize/optimize/entrypoint.sh | 71 ++++-- script/optimize/test.sh | 10 +- util/containerdutil/manifest.go | 18 +- 13 files changed, 931 insertions(+), 86 deletions(-) create mode 100644 analyzer/recorder/images.go create mode 100644 analyzer/recorder/util.go diff --git a/analyzer/analyzer.go b/analyzer/analyzer.go index 3dbc06054..df5831daa 100644 --- a/analyzer/analyzer.go +++ b/analyzer/analyzer.go @@ -188,7 +188,7 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts .. }) // Start to monitor "/" and run the task. - rc, err := recorder.NewImageRecorder(ctx, cs, img, platforms.Default()) + rc, err := recorder.NewImageRecorder(ctx, cs, img, platforms.DefaultStrict()) if err != nil { return "", err } diff --git a/analyzer/recorder/images.go b/analyzer/recorder/images.go new file mode 100644 index 000000000..7d151097b --- /dev/null +++ b/analyzer/recorder/images.go @@ -0,0 +1,253 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package recorder + +import ( + "archive/tar" + "compress/gzip" + "context" + "encoding/json" + "fmt" + "io" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/archive/compression" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/images" + "github.com/containerd/containerd/labels" + "github.com/containerd/containerd/platforms" + "github.com/containerd/stargz-snapshotter/util/containerdutil" + "github.com/opencontainers/go-digest" + ocispecVersion "github.com/opencontainers/image-spec/specs-go" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +const recordJSON = "stargz.record.json" + +// RecordOutToImage writes the specified record out blob as an image. +func RecordOutToImage(ctx context.Context, client *containerd.Client, recordOutDgst digest.Digest, ref string) (*images.Image, error) { + cs := client.ContentStore() + is := client.ImageService() + + // Write blob + ra, err := cs.ReaderAt(ctx, ocispec.Descriptor{Digest: recordOutDgst}) + if err != nil { + return nil, err + } + defer ra.Close() + recordSize := ra.Size() + sr := io.NewSectionReader(ra, 0, recordSize) + blobW, err := content.OpenWriter(ctx, cs, content.WithRef(fmt.Sprintf("recording-ref-%s", recordOutDgst))) + if err != nil { + return nil, err + } + defer blobW.Close() + if err := blobW.Truncate(0); err != nil { + return nil, err + } + zw := gzip.NewWriter(blobW) + defer zw.Close() + diffID := digest.Canonical.Digester() + tw := tar.NewWriter(io.MultiWriter(zw, diffID.Hash())) + if err := tw.WriteHeader(&tar.Header{ + Name: recordJSON, + Typeflag: tar.TypeReg, + Size: recordSize, + }); err != nil { + return nil, err + } + if _, err := io.CopyN(tw, sr, recordSize); err != nil { + return nil, err + } + if err := tw.Close(); err != nil { + return nil, err + } + if err := zw.Close(); err != nil { + return nil, err + } + blobLabels := map[string]string{ + labels.LabelUncompressed: diffID.Digest().String(), + } + if err := blobW.Commit(ctx, 0, "", content.WithLabels(blobLabels)); err != nil && !errdefs.IsAlreadyExists(err) { + return nil, err + } + blobInfo, err := cs.Info(ctx, blobW.Digest()) + if err != nil { + return nil, err + } + blobDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageLayerGzip, + Digest: blobInfo.Digest, + Size: blobInfo.Size, + } + if err := blobW.Close(); err != nil { + return nil, err + } + + // Write config + configW, err := content.OpenWriter(ctx, cs, content.WithRef(fmt.Sprintf("recording-ref-config-%s", recordOutDgst))) + if err != nil { + return nil, err + } + defer configW.Close() + if err := json.NewEncoder(configW).Encode(ocispec.Image{ + Architecture: platforms.DefaultSpec().Architecture, + OS: platforms.DefaultSpec().OS, + RootFS: ocispec.RootFS{ + Type: "layers", + DiffIDs: []digest.Digest{diffID.Digest()}, + }, + }); err != nil { + return nil, err + } + if err := configW.Commit(ctx, 0, ""); err != nil && !errdefs.IsAlreadyExists(err) { + return nil, err + } + configInfo, err := cs.Info(ctx, configW.Digest()) + if err != nil { + return nil, err + } + configDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: configInfo.Digest, + Size: configInfo.Size, + } + if err := configW.Close(); err != nil { + return nil, err + } + + // Write manifest + manifestW, err := content.OpenWriter(ctx, cs, content.WithRef(fmt.Sprintf("recording-ref-manifest-%s", recordOutDgst))) + if err != nil { + return nil, err + } + defer manifestW.Close() + if err := json.NewEncoder(manifestW).Encode(ocispec.Manifest{ + Versioned: ocispecVersion.Versioned{ + SchemaVersion: 2, + }, + MediaType: ocispec.MediaTypeImageManifest, + Config: configDesc, + Layers: []ocispec.Descriptor{blobDesc}, + }); err != nil { + return nil, err + } + if err := manifestW.Commit(ctx, 0, "", content.WithLabels(map[string]string{ + "containerd.io/gc.ref.content.record.config": configDesc.Digest.String(), + "containerd.io/gc.ref.content.record.blob": blobDesc.Digest.String(), + })); err != nil && !errdefs.IsAlreadyExists(err) { + return nil, err + } + manifestInfo, err := cs.Info(ctx, manifestW.Digest()) + if err != nil { + return nil, err + } + manifestDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageManifest, + Digest: manifestInfo.Digest, + Size: manifestInfo.Size, + } + if err := manifestW.Close(); err != nil { + return nil, err + } + + // Write image + _ = is.Delete(ctx, ref) + res, err := is.Create(ctx, images.Image{ + Name: ref, + Target: manifestDesc, + }) + return &res, err +} + +// RecordInFromImage gets a record out file from the specified image. +func RecordInFromImage(ctx context.Context, client *containerd.Client, ref string, platform platforms.MatchComparer) (digest.Digest, error) { + is := client.ImageService() + cs := client.ContentStore() + + i, err := is.Get(ctx, ref) + if err != nil { + return "", err + } + + manifestDesc, err := containerdutil.ManifestDesc(ctx, cs, i.Target, platform) + if err != nil { + return "", err + } + p, err := content.ReadBlob(ctx, cs, manifestDesc) + if err != nil { + return "", err + } + var manifest ocispec.Manifest + if err := json.Unmarshal(p, &manifest); err != nil { + return "", err + } + if len(manifest.Layers) != 1 { + return "", fmt.Errorf("record image must have 1 layer") + } + recordOut := manifest.Layers[0] + + ra, err := cs.ReaderAt(ctx, recordOut) + if err != nil { + return "", err + } + defer ra.Close() + dr, err := compression.DecompressStream(io.NewSectionReader(ra, 0, ra.Size())) + if err != nil { + return "", err + } + var recordOutR io.Reader + var recordOutSize int64 + tr := tar.NewReader(dr) + for { + h, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } else { + return "", err + } + } + if cleanEntryName(h.Name) == recordJSON { + recordOutR, recordOutSize = tr, h.Size + break + } + } + if recordOutR == nil { + return "", fmt.Errorf("failed to find record file") + } + recordW, err := content.OpenWriter(ctx, cs, content.WithRef(fmt.Sprintf("recording-in-ref-%s", manifestDesc.Digest))) + if err != nil { + return "", err + } + defer recordW.Close() + if err := recordW.Truncate(0); err != nil { + return "", err + } + if _, err := io.CopyN(recordW, recordOutR, recordOutSize); err != nil { + return "", err + } + if err := recordW.Commit(ctx, 0, ""); err != nil && !errdefs.IsAlreadyExists(err) { + return "", err + } + dgst := recordW.Digest() + if err := recordW.Close(); err != nil { + return "", err + } + return dgst, nil +} diff --git a/analyzer/recorder/recorder.go b/analyzer/recorder/recorder.go index 9da734409..fb8f1cdf6 100644 --- a/analyzer/recorder/recorder.go +++ b/analyzer/recorder/recorder.go @@ -31,7 +31,6 @@ import ( "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/images" "github.com/containerd/containerd/images/converter/uncompress" - "github.com/containerd/containerd/log" "github.com/containerd/containerd/platforms" "github.com/containerd/stargz-snapshotter/recorder" "github.com/containerd/stargz-snapshotter/util/containerdutil" @@ -84,7 +83,6 @@ func imageRecorderFromManifest(ctx context.Context, cs content.Store, manifestDe // TODO: During optimization, we uncompress the blob several times (here and during // creating eStargz layer). We should unify this process for better optimization // performance. - log.G(ctx).Infof("analyzing blob %q", desc.Digest) readerAt, err := cs.ReaderAt(ctx, desc) if err != nil { return nil, fmt.Errorf("failed to get reader blob %v: %w", desc.Digest, err) diff --git a/analyzer/recorder/util.go b/analyzer/recorder/util.go new file mode 100644 index 000000000..6034b99c5 --- /dev/null +++ b/analyzer/recorder/util.go @@ -0,0 +1,263 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package recorder + +import ( + "context" + "encoding/json" + "io" + "sync" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/images/converter" + "github.com/containerd/containerd/platforms" + "github.com/containerd/stargz-snapshotter/estargz" + "github.com/containerd/stargz-snapshotter/estargz/zstdchunked" + "github.com/containerd/stargz-snapshotter/recorder" + "github.com/containerd/stargz-snapshotter/util/containerdutil" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" +) + +func CopyRecordFromImage(ctx context.Context, client *containerd.Client, recordInRef, targetRef string, platform platforms.MatchComparer) (map[digest.Digest][]string, error) { + ctx, done, err := client.WithLease(ctx) + if err != nil { + return nil, err + } + defer done(ctx) + + cs := client.ContentStore() + is := client.ImageService() + + recordInImg, err := is.Get(ctx, recordInRef) + if err != nil { + return nil, err + } + targetImg, err := is.Get(ctx, targetRef) + if err != nil { + return nil, err + } + + recordInManifestDescs, err := containerdutil.ManifestDescs(ctx, cs, recordInImg.Target, platform) + if err != nil { + return nil, err + } + targetManifestDescs, err := containerdutil.ManifestDescs(ctx, cs, targetImg.Target, platform) + if err != nil { + return nil, err + } + + records := make(map[digest.Digest]digest.Digest) + var recordsMu sync.Mutex + eg, egCtx := errgroup.WithContext(ctx) + for _, targetDesc := range targetManifestDescs { + targetDesc := targetDesc + eg.Go(func() error { + recordInDesc := recordInManifestDescs[0] + if targetDesc.Platform != nil { + for _, inDesc := range recordInManifestDescs { + if inDesc.Platform == nil || platforms.Only(*targetDesc.Platform).Match(*inDesc.Platform) { + if _, err := cs.Info(ctx, inDesc.Digest); err == nil { + recordInDesc = inDesc + } + } + } + } + p, err := content.ReadBlob(egCtx, cs, targetDesc) + if err != nil { + return err + } + var manifest ocispec.Manifest + if err := json.Unmarshal(p, &manifest); err != nil { + return err + } + rec, err := imageRecorderFromManifest(egCtx, cs, targetDesc, manifest) + if err != nil { + return err + } + scanner := prioritizedFilesScanner{r: rec} + if _, err := converter.DefaultIndexConvertFunc(scanner.scan, false, platforms.All)(egCtx, cs, recordInDesc); err != nil { + return err + } + d, err := scanner.r.Commit(egCtx) + if err != nil { + return err + } + recordsMu.Lock() + records[targetDesc.Digest] = d + recordsMu.Unlock() + return rec.Close() + }) + } + if err := eg.Wait(); err != nil { + return nil, err + } + return recordsToPaths(ctx, cs, records) +} + +func recordsToPaths(ctx context.Context, cs content.Store, records map[digest.Digest]digest.Digest) (map[digest.Digest][]string, error) { + pathsPerLayer := make(map[digest.Digest][]string) + for targetDgst, recordOut := range records { + pFilesPerLayer, err := PrioritizedFilesFromRecord(ctx, cs, targetDgst, recordOut) + if err != nil { + return nil, err + } + for layerDgst, files := range pFilesPerLayer { + if len(files) <= 0 { + continue + } + pathsPerLayer[layerDgst] = files + } + } + return pathsPerLayer, nil +} + +type prioritizedFilesScanner struct { + r *ImageRecorder +} + +func (s *prioritizedFilesScanner) scan(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { + ra, err := cs.ReaderAt(ctx, desc) + if err != nil { + return nil, nil + } + defer ra.Close() + r, err := estargz.Open(io.NewSectionReader(ra, 0, desc.Size), estargz.WithDecompressors(new(zstdchunked.Decompressor))) + if err != nil { + return nil, nil + } + var offset int64 + if _, ok := r.Lookup(estargz.NoPrefetchLandmark); ok { + return nil, nil // no prioritized files + } else if e, ok := r.Lookup(estargz.PrefetchLandmark); ok { + offset = e.Offset + } else { + return nil, nil // no prioritized files + } + if offset <= 0 { + return nil, nil // no prioritized files + } + r.ForEachEntry(func(e *estargz.TOCEntry) bool { + if e.Offset <= offset && e.Size > 0 { + if err := s.r.Record(e.Name); err != nil { + logrus.Debugf("failed to record %q: %v", e.Name, err) + } + } + return true + }) + return nil, nil +} + +func PrioritizedFilesFromPaths(ctx context.Context, client *containerd.Client, paths []string, targetRef string, platform platforms.MatchComparer) (map[digest.Digest][]string, error) { + cs := client.ContentStore() + is := client.ImageService() + + targetImg, err := is.Get(ctx, targetRef) + if err != nil { + return nil, err + } + targetManifestDescs, err := containerdutil.ManifestDescs(ctx, cs, targetImg.Target, platform) + if err != nil { + return nil, err + } + records := make(map[digest.Digest]digest.Digest) + var recordsMu sync.Mutex + eg, egCtx := errgroup.WithContext(ctx) + for _, targetDesc := range targetManifestDescs { + targetDesc := targetDesc + eg.Go(func() error { + p, err := content.ReadBlob(egCtx, cs, targetDesc) + if err != nil { + return err + } + var manifest ocispec.Manifest + if err := json.Unmarshal(p, &manifest); err != nil { + return err + } + rec, err := imageRecorderFromManifest(egCtx, cs, targetDesc, manifest) + if err != nil { + return err + } + for _, p := range paths { + if err := rec.Record(p); err != nil { + logrus.Debugf("failed to record %q: %v", p, err) + } + } + d, err := rec.Commit(egCtx) + if err != nil { + return err + } + recordsMu.Lock() + records[targetDesc.Digest] = d + recordsMu.Unlock() + return rec.Close() + }) + } + if err := eg.Wait(); err != nil { + return nil, err + } + return recordsToPaths(ctx, cs, records) +} + +func PrioritizedFilesFromRecord(ctx context.Context, cs content.Store, manifestDgst, recordOutDgst digest.Digest) (map[digest.Digest][]string, error) { + ra, err := cs.ReaderAt(ctx, ocispec.Descriptor{Digest: recordOutDgst}) + if err != nil { + return nil, err + } + defer ra.Close() + recordOutR := io.NewSectionReader(ra, 0, ra.Size()) + mb, err := content.ReadBlob(ctx, cs, ocispec.Descriptor{Digest: manifestDgst}) + if err != nil { + return nil, err + } + var manifest ocispec.Manifest + if err := json.Unmarshal(mb, &manifest); err != nil { + return nil, err + } + dec := json.NewDecoder(recordOutR) + pathsPerLayer := make(map[digest.Digest][]string, len(manifest.Layers)) + for _, layerDesc := range manifest.Layers { + pathsPerLayer[layerDesc.Digest] = make([]string, 0) + } + added := make(map[digest.Digest]map[string]struct{}, len(manifest.Layers)) + for dec.More() { + var e recorder.Entry + if err := dec.Decode(&e); err != nil { + return nil, err + } + if *e.LayerIndex > len(manifest.Layers) || e.ManifestDigest != manifestDgst.String() { + continue + } + dgst := manifest.Layers[*e.LayerIndex].Digest + if _, ok := pathsPerLayer[dgst]; !ok { + continue + } + if added[dgst] == nil { + added[dgst] = map[string]struct{}{} + } + if _, ok := added[dgst][e.Path]; ok { + continue + } + added[dgst][e.Path] = struct{}{} + pathsPerLayer[dgst] = append(pathsPerLayer[dgst], e.Path) + } + + return pathsPerLayer, nil +} diff --git a/cmd/ctr-remote/commands/convert.go b/cmd/ctr-remote/commands/convert.go index 4c67738d5..d54c73708 100644 --- a/cmd/ctr-remote/commands/convert.go +++ b/cmd/ctr-remote/commands/convert.go @@ -18,19 +18,25 @@ package commands import ( "compress/gzip" + gocontext "context" "encoding/json" "errors" "fmt" + "io" "os" + "github.com/containerd/containerd" "github.com/containerd/containerd/cmd/ctr/commands" + "github.com/containerd/containerd/content" "github.com/containerd/containerd/images/converter" "github.com/containerd/containerd/images/converter/uncompress" "github.com/containerd/containerd/platforms" + imgrecorder "github.com/containerd/stargz-snapshotter/analyzer/recorder" "github.com/containerd/stargz-snapshotter/estargz" estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz" zstdchunkedconvert "github.com/containerd/stargz-snapshotter/nativeconverter/zstdchunked" "github.com/containerd/stargz-snapshotter/recorder" + "github.com/opencontainers/go-digest" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sirupsen/logrus" "github.com/urfave/cli" @@ -58,6 +64,14 @@ When '--all-platforms' is given all images in a manifest list must be available. Name: "estargz-record-in", Usage: "Read 'ctr-remote optimize --record-out=' record file", }, + cli.StringFlag{ + Name: "estargz-record-in-ref", + Usage: "Read record file distributed as an image", + }, + cli.StringFlag{ + Name: "estargz-record-copy", + Usage: "Copy record of prioritized files from existing eStargz image", + }, cli.IntFlag{ Name: "estargz-compression-level", Usage: "eStargz compression level", @@ -103,6 +117,18 @@ When '--all-platforms' is given all images in a manifest list must be available. return errors.New("src and target image need to be specified") } + client, ctx, cancel, err := commands.NewClient(context) + if err != nil { + return err + } + defer cancel() + + ctx, done, err := client.WithLease(ctx) + if err != nil { + return err + } + defer done(ctx) + var platformMC platforms.MatchComparer if context.Bool("all-platforms") { platformMC = platforms.All @@ -125,11 +151,11 @@ When '--all-platforms' is given all images in a manifest list must be available. var layerConvertFunc converter.ConvertFunc if context.Bool("estargz") { - esgzOpts, err := getESGZConvertOpts(context) + esgzOpts, esgzOptsPerLayer, err := getESGZConvertOpts(ctx, context, client, srcRef, platformMC) if err != nil { return err } - layerConvertFunc = estargzconvert.LayerConvertFunc(esgzOpts...) + layerConvertFunc = estargzconvert.LayerConvertWithLayerAndCommonOptsFunc(esgzOptsPerLayer, esgzOpts...) if !context.Bool("oci") { logrus.Warn("option --estargz should be used in conjunction with --oci") } @@ -142,11 +168,11 @@ When '--all-platforms' is given all images in a manifest list must be available. } if context.Bool("zstdchunked") { - esgzOpts, err := getESGZConvertOpts(context) + esgzOpts, esgzOptsPerLayer, err := getESGZConvertOpts(ctx, context, client, srcRef, platformMC) if err != nil { return err } - layerConvertFunc = zstdchunkedconvert.LayerConvertFunc(esgzOpts...) + layerConvertFunc = zstdchunkedconvert.LayerConvertWithLayerAndCommonOptsFunc(esgzOptsPerLayer, esgzOpts...) if !context.Bool("oci") { return errors.New("option --zstdchunked must be used in conjunction with --oci") } @@ -162,18 +188,15 @@ When '--all-platforms' is given all images in a manifest list must be available. if layerConvertFunc == nil { return errors.New("specify layer converter") } - convertOpts = append(convertOpts, converter.WithLayerConvertFunc(layerConvertFunc)) + convertOpts = append(convertOpts, converter.WithLayerConvertFunc(func(ctx gocontext.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { + logrus.Infof("converting blob %q", desc.Digest) + return layerConvertFunc(ctx, cs, desc) + })) if context.Bool("oci") { convertOpts = append(convertOpts, converter.WithDockerToOCI(true)) } - client, ctx, cancel, err := commands.NewClient(context) - if err != nil { - return err - } - defer cancel() - newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...) if err != nil { return err @@ -183,29 +206,81 @@ When '--all-platforms' is given all images in a manifest list must be available. }, } -func getESGZConvertOpts(context *cli.Context) ([]estargz.Option, error) { +func getESGZConvertOpts(ctx gocontext.Context, context *cli.Context, client *containerd.Client, ref string, p platforms.MatchComparer) ([]estargz.Option, map[digest.Digest][]estargz.Option, error) { esgzOpts := []estargz.Option{ estargz.WithCompressionLevel(context.Int("estargz-compression-level")), estargz.WithChunkSize(context.Int("estargz-chunk-size")), } + var esgzOptsPerLayer map[digest.Digest][]estargz.Option if estargzRecordIn := context.String("estargz-record-in"); estargzRecordIn != "" { - paths, err := readPathsFromRecordFile(estargzRecordIn) + for _, key := range []string{"estargz-record-in-ref", "estargz-record-copy"} { + if in := context.String(key); in != "" { + return nil, nil, fmt.Errorf("\"estargz-record-in\" must not used with %q", key) + } + } + var err error + esgzOptsPerLayer, err = recordInFromFile(ctx, client, estargzRecordIn, ref, p) if err != nil { - return nil, err + return nil, nil, err + } + var ignored []string + esgzOpts = append(esgzOpts, estargz.WithAllowPrioritizeNotFound(&ignored)) + } + if estargzRecordInRef := context.String("estargz-record-in-ref"); estargzRecordInRef != "" { + for _, key := range []string{"estargz-record-in", "estargz-record-copy"} { + if in := context.String(key); in != "" { + return nil, nil, fmt.Errorf("\"estargz-record-in-ref\" must not used with %q", key) + } + } + var err error + esgzOptsPerLayer, err = recordInFromImage(ctx, client, estargzRecordInRef, ref, p) + if err != nil { + return nil, nil, err } - esgzOpts = append(esgzOpts, estargz.WithPrioritizedFiles(paths)) var ignored []string esgzOpts = append(esgzOpts, estargz.WithAllowPrioritizeNotFound(&ignored)) } - return esgzOpts, nil + if estargzRecordCopyRef := context.String("estargz-record-copy"); estargzRecordCopyRef != "" { + for _, key := range []string{"estargz-record-in", "estargz-record-in-ref"} { + if in := context.String(key); in != "" { + return nil, nil, fmt.Errorf("\"estargz-record-copy\" must not used with %q", key) + } + } + var err error + esgzOptsPerLayer, err = copyRecordFromImage(ctx, client, estargzRecordCopyRef, ref, p) + if err != nil { + return nil, nil, err + } + var ignored []string + esgzOpts = append(esgzOpts, estargz.WithAllowPrioritizeNotFound(&ignored)) + } + + return esgzOpts, esgzOptsPerLayer, nil } -func readPathsFromRecordFile(filename string) ([]string, error) { +func recordInFromFile(ctx gocontext.Context, client *containerd.Client, filename, targetImgRef string, platform platforms.MatchComparer) (map[digest.Digest][]estargz.Option, error) { r, err := os.Open(filename) if err != nil { return nil, err } defer r.Close() + return recordInFromReader(ctx, client, r, targetImgRef, platform) +} + +func recordInFromImage(ctx gocontext.Context, client *containerd.Client, recordInImgRef, targetImgRef string, platform platforms.MatchComparer) (map[digest.Digest][]estargz.Option, error) { + recordInDgst, err := imgrecorder.RecordInFromImage(ctx, client, recordInImgRef, platform) + if err != nil { + return nil, err + } + ra, err := client.ContentStore().ReaderAt(ctx, ocispec.Descriptor{Digest: recordInDgst}) + if err != nil { + return nil, err + } + defer ra.Close() + return recordInFromReader(ctx, client, io.NewSectionReader(ra, 0, ra.Size()), targetImgRef, platform) +} + +func recordInFromReader(ctx gocontext.Context, client *containerd.Client, r io.Reader, targetImgRef string, platform platforms.MatchComparer) (map[digest.Digest][]estargz.Option, error) { dec := json.NewDecoder(r) var paths []string added := make(map[string]struct{}) @@ -219,5 +294,26 @@ func readPathsFromRecordFile(filename string) ([]string, error) { added[e.Path] = struct{}{} } } - return paths, nil + logrus.Infof("analyzing blobs of %q", targetImgRef) + recordOuts, err := imgrecorder.PrioritizedFilesFromPaths(ctx, client, paths, targetImgRef, platform) + if err != nil { + return nil, err + } + return pathsToOptions(recordOuts), nil +} + +func copyRecordFromImage(ctx gocontext.Context, client *containerd.Client, recordInImgRef, targetImgRef string, p platforms.MatchComparer) (map[digest.Digest][]estargz.Option, error) { + recordOuts, err := imgrecorder.CopyRecordFromImage(ctx, client, recordInImgRef, targetImgRef, p) + if err != nil { + return nil, err + } + return pathsToOptions(recordOuts), nil +} + +func pathsToOptions(paths map[digest.Digest][]string) map[digest.Digest][]estargz.Option { + layerOpts := make(map[digest.Digest][]estargz.Option) + for layerDgst, o := range paths { + layerOpts[layerDgst] = []estargz.Option{estargz.WithPrioritizedFiles(o)} + } + return layerOpts } diff --git a/cmd/ctr-remote/commands/optimize.go b/cmd/ctr-remote/commands/optimize.go index 2509ce341..866986383 100644 --- a/cmd/ctr-remote/commands/optimize.go +++ b/cmd/ctr-remote/commands/optimize.go @@ -32,11 +32,11 @@ import ( "github.com/containerd/containerd/images/converter" "github.com/containerd/containerd/platforms" "github.com/containerd/stargz-snapshotter/analyzer" + imgrecorder "github.com/containerd/stargz-snapshotter/analyzer/recorder" "github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/estargz/zstdchunked" estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz" zstdchunkedconvert "github.com/containerd/stargz-snapshotter/nativeconverter/zstdchunked" - "github.com/containerd/stargz-snapshotter/recorder" "github.com/containerd/stargz-snapshotter/util/containerdutil" "github.com/opencontainers/go-digest" ocispec "github.com/opencontainers/image-spec/specs-go/v1" @@ -77,6 +77,10 @@ var OptimizeCommand = cli.Command{ Name: "record-out", Usage: "record the monitor log to the specified file", }, + cli.StringFlag{ + Name: "record-out-ref", + Usage: "record the monitor log as the specified image", + }, cli.BoolFlag{ Name: "oci", Usage: "convert Docker media types to OCI media types", @@ -139,13 +143,18 @@ var OptimizeCommand = cli.Command{ recordOut, esgzOptsPerLayer, wrapper, err := analyze(ctx, clicontext, client, srcRef) if err != nil { - return err + return fmt.Errorf("failed to analyze: %w", err) } if recordOutFile := clicontext.String("record-out"); recordOutFile != "" { if err := writeContentFile(ctx, client, recordOut, recordOutFile); err != nil { return fmt.Errorf("failed output record file: %w", err) } } + if recordOutRef := clicontext.String("record-out-ref"); recordOutRef != "" { + if _, err := imgrecorder.RecordOutToImage(ctx, client, recordOut, recordOutRef); err != nil { + return fmt.Errorf("failed output record ref: %w", err) + } + } var f converter.ConvertFunc if clicontext.Bool("zstdchunked") { f = zstdchunkedconvert.LayerConvertWithLayerOptsFunc(esgzOptsPerLayer) @@ -160,7 +169,7 @@ var OptimizeCommand = cli.Command{ convertOpts = append(convertOpts, converter.WithLayerConvertFunc(layerConvertFunc)) newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...) if err != nil { - return err + return fmt.Errorf("failed to convert %w", err) } fmt.Fprintln(clicontext.App.Writer, newImg.Target.Digest.String()) return nil @@ -231,53 +240,31 @@ func analyze(ctx context.Context, clicontext *cli.Context, client *containerd.Cl } recordOut, err := analyzer.Analyze(ctx, client, srcRef, aOpts...) if err != nil { - return "", nil, nil, err + return "", nil, nil, fmt.Errorf("failed to analyze: %w", err) } // Parse record file srcImg, err := is.Get(ctx, srcRef) if err != nil { - return "", nil, nil, err + return "", nil, nil, fmt.Errorf("failed to get image: %w", err) } manifestDesc, err := containerdutil.ManifestDesc(ctx, cs, srcImg.Target, platforms.DefaultStrict()) if err != nil { - return "", nil, nil, err + return "", nil, nil, fmt.Errorf("failed to get manifest: %w", err) } p, err := content.ReadBlob(ctx, cs, manifestDesc) if err != nil { - return "", nil, nil, err + return "", nil, nil, fmt.Errorf("failed to read manifest: %w", err) } var manifest ocispec.Manifest if err := json.Unmarshal(p, &manifest); err != nil { - return "", nil, nil, err + return "", nil, nil, fmt.Errorf("failed to unmarshal manifest: %w", err) } // TODO: this should be indexed by layer "index" (not "digest") - layerLogs := make(map[digest.Digest][]string, len(manifest.Layers)) - ra, err := cs.ReaderAt(ctx, ocispec.Descriptor{Digest: recordOut}) + layerLogs, err := imgrecorder.PrioritizedFilesFromRecord(ctx, cs, manifestDesc.Digest, recordOut) if err != nil { - return "", nil, nil, err + return "", nil, nil, fmt.Errorf("failed to get prioritized files from record: %w", err) } - defer ra.Close() - dec := json.NewDecoder(io.NewSectionReader(ra, 0, ra.Size())) - added := make(map[digest.Digest]map[string]struct{}, len(manifest.Layers)) - for dec.More() { - var e recorder.Entry - if err := dec.Decode(&e); err != nil { - return "", nil, nil, err - } - if *e.LayerIndex < len(manifest.Layers) && - e.ManifestDigest == manifestDesc.Digest.String() { - dgst := manifest.Layers[*e.LayerIndex].Digest - if added[dgst] == nil { - added[dgst] = map[string]struct{}{} - } - if _, ok := added[dgst][e.Path]; !ok { - added[dgst][e.Path] = struct{}{} - layerLogs[dgst] = append(layerLogs[dgst], e.Path) - } - } - } - // Create a converter wrapper for skipping layer conversion. This skip occurs // if "reuse" option is specified, the source layer is already valid estargz // and no access occur to that layer. diff --git a/docs/ctr-remote.md b/docs/ctr-remote.md index 0e6520e56..cde470aa0 100644 --- a/docs/ctr-remote.md +++ b/docs/ctr-remote.md @@ -266,3 +266,178 @@ ctr-remote image optimize --oci \ By default, when the source image is a multi-platform image, `ctr-remote` converts the image corresponding to the platform where `ctr-remote` runs. Note that though the images specified by `--all-platform` and `--platform` are converted to eStargz, images that don't correspond to the current platform aren't *optimized*. That is, these images are lazily pulled but without prefetch. + +### Static optimization by sharing prioritized files information + +If you don't want to run the prifiler for optimizing an eStargz image as mentioned above, you can take either of the following approaches using pre-generated prioritized files information. + +- Copying prioritized files information from an existing eStargz image +- Acquiering prioritized files information shared as a file +- Acquiering prioritized files information shared via registry + +#### Copying prioritized files information from an existing eStargz image + +`ctr-remote image convert` command provides `--estargz-record-copy` option that allowes you to specify an existing eStargz image. +When this option is used, ctr-remote analyzes the specified eStargz image, extracts prioritized files information then applies it to the newly-creating eStargz image. + +For example, when you optimize an eStargz image using profiler as mentioned above, + +``` +ctr-remote image optimize --oci \ + --entrypoint='[ "/bin/bash", "-c" ]' --args='[ "go version" ]' \ + ghcr.io/stargz-containers/golang:1.15.3-buster-org \ + registry2:5000/golang:1.15.3-esgz-go-version +``` + +You get the optimized eStargz `registry2:5000/golang:1.15.3-esgz-go-version`. + +Now you can create a new eStargz image with the same optimization as `registry2:5000/golang:1.15.3-esgz-go-version` using `--estargz-record-copy` without running the profiler. + +``` +ctr-remote image convert --oci --estargz \ + --estargz-record-copy registry2:5000/golang:1.15.3-esgz-go-version \ + ghcr.io/stargz-containers/golang:1.15.3-buster-org \ + registry2:5000/golang:1.15.3-esgz-go-version-static +``` + +Here, `registry2:5000/golang:1.15.3-esgz-go-version-static` has the same digest as `registry2:5000/golang:1.15.3-esgz-go-version` because they have the same prioritized files information. + +> It's allowed that the eStragz image specified by `estargz-record-copy` is completely different from the converting image (e.g. optimizing `python:3.9` image by copying prioritized files information from `golang:1.15.3-esgz-go-version`). +> In this case, prioritized files information is applied to the newly-creating eStargz in a best-effort manner. +> Therefore, files contained in both of the image specified by `estargz-record-copy` and newly-creating one are considered as the prioritized files. + +#### Sharing prioritized files information using record files + +When optimizing an eStargz image, you can use the pre-generated *record file* which records the prioritized files of the image for a specific workload. + +A record file is formed as either of the following formats. + +- File : JSON file which records prioritized files for a specific workload +- OCI image : An OCI-compliant image contains a record file. It can be shared among hosts/users via container registry. + +As shown in the following, `ctr-remote image optimize` command with `--record-out` option creates an optimized eStargz and generates a record file which contains the prioritized files information. + +``` +ctr-remote image optimize --oci --record-out=/tmp/record.json \ + --entrypoint='[ "/bin/bash", "-c" ]' --args='[ "go version" ]' \ + ghcr.io/stargz-containers/golang:1.15.3-buster-org \ + registry2:5000/golang:1.15.3-esgz-go-version +``` + +The generated record file `/tmp/record.json` contains a list of prioritized files like the following. +Please see [`recorder`](https://github.com/containerd/stargz-snapshotter/blob/main/recorder/recorder.go) package in this repo for the detailed format of the JSON. + +```json +{"path":"bin/bash","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"bin/bash","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/ld-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/ld.so.cache","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":4} +{"path":"lib/x86_64-linux-gnu/libtinfo.so.6.1","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libdl-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libc-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/nsswitch.conf","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/ld.so.cache","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":4} +{"path":"lib/x86_64-linux-gnu/libnss_files-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/passwd","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"usr/local/go/bin/go","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":5} +{"path":"lib/x86_64-linux-gnu/ld-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/ld.so.cache","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":4} +{"path":"lib/x86_64-linux-gnu/libpthread-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libc-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +``` + +The generated record file can be used for optimizing other images without running the profiler. +`ctr-remote image convert` command provides `--estargz-record-in` option that allowes you to specify the record file. +The newly-created eStargz will be applied the prioritized files information described in the record file. + +``` +ctr-remote image convert --oci --estargz \ + --estargz-record-in /tmp/record.json \ + ghcr.io/stargz-containers/golang:1.15.3-buster-org \ + registry2:5000/golang:1.15.3-esgz-go-version-static +``` + +Here, `registry2:5000/golang:1.15.3-esgz-go-version-static` has the same digest as `registry2:5000/golang:1.15.3-esgz-go-version` because they have the same prioritized files information. + +> Prioritized files information described in the record file image is applied to the newly-creating eStargz in a best-effort manner. +> Therefore, files contained in both of the record file and the newly-creating eStargz are considered as the prioritized files. + +#### Sharing prioritized files information via container registry + +You can share the record file metioned above via container registry. +`ctr-remote image optimize` command provides `--record-out-ref` option which packages the record file as an OCI image. +The created record file image can be pushed to the container registry and shared among hosts/users via registry. + +``` +ctr-remote image optimize --oci --record-out-ref=registry2:5000/golang:record-1.15.3-esgz-go-version \ + --entrypoint='[ "/bin/bash", "-c" ]' --args='[ "go version" ]' \ + ghcr.io/stargz-containers/golang:1.15.3-buster-org \ + registry2:5000/golang:1.15.3-esgz-go-version +``` + +Here, `registry2:5000/golang:record-1.15.3-esgz-go-version` is the record files packaged as an OCI image. + +`ctr-remote image convert` command provides `--estargz-record-in-ref` option that allowes you to use the recrod file image for optimizing a new eStargz image. +The newly-created eStargz will be applied the prioritized files information described in the record file. + +``` +ctr-remote image convert --oci --estargz \ + --estargz-record-in-ref registry2:5000/golang:record-1.15.3-esgz-go-version \ + ghcr.io/stargz-containers/golang:1.15.3-buster-org \ + registry2:5000/golang:1.15.3-esgz-go-version-static +``` + +Here, `registry2:5000/golang:1.15.3-esgz-go-version-static` has the same digest as `registry2:5000/golang:1.15.3-esgz-go-version` because they have the same prioritized files information. + +> Prioritized files information described in the record file image is applied to the newly-creating eStargz in a best-effort manner. +> Therefore, files contained in both of the record file and the newly-creating eStargz are considered as the prioritized files. + +##### Format of record file image + +Record file image described above is an OCI image only contains one gzip-compressed layer that contains the record file as a tar entry named `stargz.record.json`. + +The follwoing is the manifest of the record file image `registry2:5000/golang:record-1.15.3-esgz-go-version` mentioned above. + +```json +{ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "config": { + "mediaType": "application/vnd.oci.image.config.v1+json", + "digest": "sha256:518c804adcec04f5148877af99ec6d216eb50fc47b0b1430217163b838ef7f15", + "size": 164 + }, + "layers": [ + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:5b37372a28490bb0737aaadb48241b72ca6e54e023b15ac481a5a3df0da69dcc", + "size": 344 + } + ] +} +``` + +Layer blob `sha256:5b37372a28490bb0737aaadb48241b72ca6e54e023b15ac481a5a3df0da69dcc` is a tar.gz blob that contains the record file as a tar entry named `stargz.record.json`. + +``` +# cat /tmp/img/blobs/sha256/5b37372a28490bb0737aaadb48241b72ca6e54e023b15ac481a5a3df0da69dcc | tar -zxO stargz.record.json +{"path":"bin/bash","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"bin/bash","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/ld-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/ld.so.cache","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":4} +{"path":"lib/x86_64-linux-gnu/libtinfo.so.6.1","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libdl-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libc-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/nsswitch.conf","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/nsswitch.conf","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/ld.so.cache","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":4} +{"path":"lib/x86_64-linux-gnu/libnss_files-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libnss_files-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/passwd","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"usr/local/go/bin/go","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":5} +{"path":"usr/local/go/bin/go","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":5} +{"path":"lib/x86_64-linux-gnu/ld-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"etc/ld.so.cache","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":4} +{"path":"lib/x86_64-linux-gnu/libpthread-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +{"path":"lib/x86_64-linux-gnu/libc-2.28.so","manifestDigest":"sha256:2322be6aa1e61c36cfa34a0d355b7f088022e34c6f17016bafae6041c67a0a1b","layerIndex":0} +``` diff --git a/estargz/estargz.go b/estargz/estargz.go index 4b655c145..2a434f491 100644 --- a/estargz/estargz.go +++ b/estargz/estargz.go @@ -485,6 +485,15 @@ func (r *Reader) Lookup(path string) (e *TOCEntry, ok bool) { return } +// ForEachEntry calls the callback func for each TOCEntry. +func (r *Reader) ForEachEntry(f func(*TOCEntry) bool) { + for _, e := range r.toc.Entries { + if !f(e) { + return + } + } +} + // OpenFile returns the reader of the specified file payload. // // Name must be absolute path or one that is relative to root. diff --git a/nativeconverter/estargz/estargz.go b/nativeconverter/estargz/estargz.go index e09fd69d2..7fd186d8a 100644 --- a/nativeconverter/estargz/estargz.go +++ b/nativeconverter/estargz/estargz.go @@ -39,13 +39,15 @@ import ( // LayerConvertFunc for more details. The difference between this function and // LayerConvertFunc is that this allows to specify additional eStargz options per layer. func LayerConvertWithLayerAndCommonOptsFunc(opts map[digest.Digest][]estargz.Option, commonOpts ...estargz.Option) converter.ConvertFunc { + var cOpts []estargz.Option + cOpts = append(cOpts, commonOpts...) if opts == nil { - return LayerConvertFunc(commonOpts...) + return LayerConvertFunc(cOpts...) } return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { // TODO: enable to speciy option per layer "index" because it's possible that there are // two layers having same digest in an image (but this should be rare case) - return LayerConvertFunc(append(commonOpts, opts[desc.Digest]...)...)(ctx, cs, desc) + return LayerConvertFunc(append(cOpts, opts[desc.Digest]...)...)(ctx, cs, desc) } } @@ -56,8 +58,10 @@ func LayerConvertWithLayerAndCommonOptsFunc(opts map[digest.Digest][]estargz.Opt // // Otherwise "containerd.io/snapshot/stargz/toc.digest" annotation will be lost, // because the Docker media type does not support layer annotations. -func LayerConvertFunc(opts ...estargz.Option) converter.ConvertFunc { +func LayerConvertFunc(esgzOpts ...estargz.Option) converter.ConvertFunc { return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { + var opts []estargz.Option // Copy the passed options slice and avoid directly modifying it + opts = append(opts, esgzOpts...) if !images.IsLayerType(desc.MediaType) { // No conversion. No need to return an error here. return nil, nil diff --git a/nativeconverter/zstdchunked/zstdchunked.go b/nativeconverter/zstdchunked/zstdchunked.go index f9ca3b18f..958c0e359 100644 --- a/nativeconverter/zstdchunked/zstdchunked.go +++ b/nativeconverter/zstdchunked/zstdchunked.go @@ -42,6 +42,23 @@ type zstdCompression struct { *zstdchunked.Compressor } +// LayerConvertWithLayerAndCommonOptsFunc converts legacy tar.gz layers into zstd:chunked +// layers. Media type is unchanged. Should be used in conjunction with WithDockerToOCI(). See +// LayerConvertFunc for more details. The difference between this function and +// LayerConvertFunc is that this allows to specify additional options per layer. +func LayerConvertWithLayerAndCommonOptsFunc(opts map[digest.Digest][]estargz.Option, commonOpts ...estargz.Option) converter.ConvertFunc { + var cOpts []estargz.Option + cOpts = append(cOpts, commonOpts...) + if opts == nil { + return LayerConvertFunc(cOpts...) + } + return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { + // TODO: enable to speciy option per layer "index" because it's possible that there are + // two layers having same digest in an image (but this should be rare case) + return LayerConvertFunc(append(cOpts, opts[desc.Digest]...)...)(ctx, cs, desc) + } +} + // LayerConvertWithLayerOptsFunc converts legacy tar.gz layers into zstd:chunked layers. // // This changes Docker MediaType to OCI MediaType so this should be used in @@ -66,8 +83,10 @@ func LayerConvertWithLayerOptsFunc(opts map[digest.Digest][]estargz.Option) conv // // Otherwise "io.containers.zstd-chunked.manifest-checksum" annotation will be lost, // because the Docker media type does not support layer annotations. -func LayerConvertFunc(opts ...estargz.Option) converter.ConvertFunc { +func LayerConvertFunc(esgzOpts ...estargz.Option) converter.ConvertFunc { return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { + var opts []estargz.Option // Copy the passed options slice and avoid directly modifying it + opts = append(opts, esgzOpts...) if !images.IsLayerType(desc.MediaType) { // No conversion. No need to return an error here. return nil, nil diff --git a/script/optimize/optimize/entrypoint.sh b/script/optimize/optimize/entrypoint.sh index 056cb0226..c83ba264a 100755 --- a/script/optimize/optimize/entrypoint.sh +++ b/script/optimize/optimize/entrypoint.sh @@ -26,6 +26,11 @@ TOC_JSON_DIGEST_ANNOTATION="containerd.io/snapshot/stargz/toc.digest" UNCOMPRESSED_SIZE_ANNOTATION="io.containers.estargz.uncompressed-size" REMOTE_SNAPSHOTTER_SOCKET=/run/containerd-stargz-grpc/containerd-stargz-grpc.sock +RECORD_IMAGE_TAG="${REGISTRY_HOST}/test/test:record$(date '+%M%S')" +RECORD_IN_OPT_IMAGE_TAG="${REGISTRY_HOST}/test/test:record-in-opt$(date '+%M%S')" +RECORD_IN_REF_OPT_IMAGE_TAG="${REGISTRY_HOST}/test/test:record-in-ref-opt$(date '+%M%S')" +RECORD_COPY_OPT_IMAGE_TAG="${REGISTRY_HOST}/test/test:record-copy-opt$(date '+%M%S')" + ## Image for doing network-related tests # # FROM ubuntu:20.04 @@ -205,62 +210,90 @@ echo "Checking optimized image..." WORKING_DIR=$(mktemp -d) PREFIX=/tmp/out/ make clean PREFIX=/tmp/out/ GO_BUILD_FLAGS="-race" make ctr-remote # Check data race -/tmp/out/ctr-remote ${OPTIMIZE_COMMAND} -entrypoint='[ "/accessor" ]' "${ORG_IMAGE_TAG}" "${OPT_IMAGE_TAG}" +/tmp/out/ctr-remote ${OPTIMIZE_COMMAND} --record-out="${WORKING_DIR}/record.json" --record-out-ref="${RECORD_IMAGE_TAG}" -entrypoint='[ "/accessor" ]' "${ORG_IMAGE_TAG}" "${OPT_IMAGE_TAG}" nerdctl push "${OPT_IMAGE_TAG}" || true -cat < "${WORKING_DIR}/0-want" +nerdctl push "${RECORD_IMAGE_TAG}" || true +nerdctl rmi "${RECORD_IMAGE_TAG}" || true +cat < "${WORKING_DIR}/0-opt-want" accessor a.txt .prefetch.landmark b.txt EOF -append_toc "${WORKING_DIR}/0-want" +append_toc "${WORKING_DIR}/0-opt-want" -cat < "${WORKING_DIR}/1-want" +cat < "${WORKING_DIR}/1-opt-want" c.txt .prefetch.landmark d.txt EOF -append_toc "${WORKING_DIR}/1-want" +append_toc "${WORKING_DIR}/1-opt-want" -cat < "${WORKING_DIR}/2-want" +cat < "${WORKING_DIR}/2-opt-want" .no.prefetch.landmark e.txt EOF -append_toc "${WORKING_DIR}/2-want" +append_toc "${WORKING_DIR}/2-opt-want" check_optimization "${OPT_IMAGE_TAG}" \ - "${WORKING_DIR}/0-want" \ - "${WORKING_DIR}/1-want" \ - "${WORKING_DIR}/2-want" + "${WORKING_DIR}/0-opt-want" \ + "${WORKING_DIR}/1-opt-want" \ + "${WORKING_DIR}/2-opt-want" + +echo "Testing estargz-record-in" +cat "${WORKING_DIR}/record.json" +/tmp/out/ctr-remote ${CONVERT_COMMAND} --estargz-record-in="${WORKING_DIR}/record.json" "${ORG_IMAGE_TAG}" "${RECORD_IN_OPT_IMAGE_TAG}" +nerdctl push "${RECORD_IN_OPT_IMAGE_TAG}" || true +check_optimization "${RECORD_IN_OPT_IMAGE_TAG}" \ + "${WORKING_DIR}/0-opt-want" \ + "${WORKING_DIR}/1-opt-want" \ + "${WORKING_DIR}/2-opt-want" + +echo "Testing estargz-record-in-ref" +nerdctl pull "${RECORD_IMAGE_TAG}" +/tmp/out/ctr-remote ${CONVERT_COMMAND} --estargz-record-in-ref="${RECORD_IMAGE_TAG}" "${ORG_IMAGE_TAG}" "${RECORD_IN_REF_OPT_IMAGE_TAG}" +nerdctl push "${RECORD_IN_REF_OPT_IMAGE_TAG}" || true +check_optimization "${RECORD_IN_REF_OPT_IMAGE_TAG}" \ + "${WORKING_DIR}/0-opt-want" \ + "${WORKING_DIR}/1-opt-want" \ + "${WORKING_DIR}/2-opt-want" + +echo "Testing estargz-record-copy" +/tmp/out/ctr-remote ${CONVERT_COMMAND} --estargz-record-copy="${OPT_IMAGE_TAG}" "${ORG_IMAGE_TAG}" "${RECORD_COPY_OPT_IMAGE_TAG}" +nerdctl push "${RECORD_COPY_OPT_IMAGE_TAG}" || true +check_optimization "${RECORD_COPY_OPT_IMAGE_TAG}" \ + "${WORKING_DIR}/0-opt-want" \ + "${WORKING_DIR}/1-opt-want" \ + "${WORKING_DIR}/2-opt-want" echo "Checking non-optimized image..." /tmp/out/ctr-remote ${NO_OPTIMIZE_COMMAND} "${ORG_IMAGE_TAG}" "${NOOPT_IMAGE_TAG}" nerdctl push "${NOOPT_IMAGE_TAG}" || true -cat < "${WORKING_DIR}/0-want" +cat < "${WORKING_DIR}/0-noopt-want" .no.prefetch.landmark a.txt accessor b.txt EOF -append_toc "${WORKING_DIR}/0-want" +append_toc "${WORKING_DIR}/0-noopt-want" -cat < "${WORKING_DIR}/1-want" +cat < "${WORKING_DIR}/1-noopt-want" .no.prefetch.landmark c.txt d.txt EOF -append_toc "${WORKING_DIR}/1-want" +append_toc "${WORKING_DIR}/1-noopt-want" -cat < "${WORKING_DIR}/2-want" +cat < "${WORKING_DIR}/2-noopt-want" .no.prefetch.landmark e.txt EOF -append_toc "${WORKING_DIR}/2-want" +append_toc "${WORKING_DIR}/2-noopt-want" check_optimization "${NOOPT_IMAGE_TAG}" \ - "${WORKING_DIR}/0-want" \ - "${WORKING_DIR}/1-want" \ - "${WORKING_DIR}/2-want" + "${WORKING_DIR}/0-noopt-want" \ + "${WORKING_DIR}/1-noopt-want" \ + "${WORKING_DIR}/2-noopt-want" # Test networking & mounting work diff --git a/script/optimize/test.sh b/script/optimize/test.sh index 76d3a5c7f..56413b1e3 100755 --- a/script/optimize/test.sh +++ b/script/optimize/test.sh @@ -103,9 +103,10 @@ echo "Testing..." function test_optimize { local OPTIMIZE_COMMAND="${1}" local NO_OPTIMIZE_COMMAND="${2}" - local GETTOCDIGEST_COMMAND="${3}" - local DECOMPRESS_COMMAND="${4}" - local INVISIBLE_TOC="${5}" + local CONVERT_COMMAND="${3}" + local GETTOCDIGEST_COMMAND="${4}" + local DECOMPRESS_COMMAND="${5}" + local INVISIBLE_TOC="${6}" cat < "${DOCKER_COMPOSE_YAML}" version: "3.3" services: @@ -119,6 +120,7 @@ services: - NO_PROXY=127.0.0.1,localhost,${REGISTRY_HOST}:443 - OPTIMIZE_COMMAND=${OPTIMIZE_COMMAND} - NO_OPTIMIZE_COMMAND=${NO_OPTIMIZE_COMMAND} + - CONVERT_COMMAND=${CONVERT_COMMAND} - GETTOCDIGEST_COMMAND=${GETTOCDIGEST_COMMAND} - DECOMPRESS_COMMAND=${DECOMPRESS_COMMAND} - INVISIBLE_TOC=${INVISIBLE_TOC} @@ -162,12 +164,14 @@ EOF test_optimize "image optimize --oci --zstdchunked" \ "image optimize --no-optimize --oci --zstdchunked" \ + "image convert --oci --zstdchunked" \ "image get-toc-digest --zstdchunked" \ "zstd -d" \ "true" test_optimize "image optimize --oci" \ "image optimize --no-optimize --oci" \ + "image convert --oci --estargz" \ "image get-toc-digest" \ "gunzip" \ "false" diff --git a/util/containerdutil/manifest.go b/util/containerdutil/manifest.go index b7adafca6..d3bde3d76 100644 --- a/util/containerdutil/manifest.go +++ b/util/containerdutil/manifest.go @@ -30,8 +30,15 @@ import ( ) func ManifestDesc(ctx context.Context, provider content.Provider, image ocispec.Descriptor, platform platforms.MatchComparer) (ocispec.Descriptor, error) { + m, err := ManifestDescs(ctx, provider, image, platform) + if err != nil { + return ocispec.Descriptor{}, err + } + return m[0], nil +} + +func ManifestDescs(ctx context.Context, provider content.Provider, image ocispec.Descriptor, platform platforms.MatchComparer) ([]ocispec.Descriptor, error) { var ( - limit = 1 m []ocispec.Descriptor wasIndex bool ) @@ -97,23 +104,20 @@ func ManifestDesc(ctx context.Context, provider content.Provider, image ocispec. return platform.Less(*descs[i].Platform, *descs[j].Platform) }) wasIndex = true - if len(descs) > limit { - return descs[:limit], nil - } return descs, nil } return nil, fmt.Errorf("unexpected media type %v for %v: %w", desc.MediaType, desc.Digest, errdefs.ErrNotFound) }), image); err != nil { - return ocispec.Descriptor{}, err + return nil, err } if len(m) == 0 { err := fmt.Errorf("manifest %v: %w", image.Digest, errdefs.ErrNotFound) if wasIndex { err = fmt.Errorf("no match for platform in manifest %v: %w", image.Digest, errdefs.ErrNotFound) } - return ocispec.Descriptor{}, err + return nil, err } - return m[0], nil + return m, nil } // Forked from github.com/containerd/containerd/images/image.go