From 2903cddb58f6ee99116e0751a2305f75f9a86461 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Fri, 18 Aug 2017 18:15:47 -0700 Subject: Improve imgdiff for large zip files Due to the cache size limit for OTA generation, we used to split large zip files linearly into pieces and do bsdiff on them. As a result, i) we lose the advantage of imgdiff; ii) if there's an accidental order change of some huge files inside the zip, we'll create an insanely large patch. This patch splits the src&tgt more smartly based on the zip entry_name. If the entry_name is empty or no matching source is found for a target chunk, we'll skip adding its source and later do a bsdiff against the whole split source image (this rarely happens in our use cases except for the metadata inside a ziparchive). After the split, the target pieces are continuous and block aligned, while the sources pieces are mutually exclusive. (Some of the source blocks may not be used if there's no matching entry_name in the target.) Then we will generate patches accordingly between each split image pairs. Afterwards, if we apply imgpatch to each pair of split source/target images and add up the patched result, we can get back the original target image. For example: Input: [src_image, tgt_image] Split: [src-0,tgt-0; src-1,tgt-1, src-2,tgt-2] Diff: [ patch-0; patch-1; patch-2] Patch: [(src-0,patch-0)=tgt-0; (src-1,patch-1)=tgt-1; (src-2,patch-2)=tgt-2;] Append: [tgt-0 + tgt-1 + tgt-2 = tgt_image] Peformance: For the small package in b/34220646, we decrease the patch size of chrome.apk dramatically from 30M to 400K due to the order change of two big .so files. On two versions of angler, I also observe decent patch size decrease. For chrome.apk, we reduced the size from 5.9M to 3.2M; and for vevlet.apk from 8.0M to 6.5M. Bug: 34220646 Test: recovery component test && apply imgdiff & imgpatch on two chrome.apk Change-Id: I145d802984fa805efbbac9d01a2e64d82ef9728b --- applypatch/imgdiff.cpp | 441 ++++++++++++++++++++++++-- applypatch/include/applypatch/imgdiff_image.h | 58 +++- tests/component/imgdiff_test.cpp | 339 +++++++++++++++++++- 3 files changed, 814 insertions(+), 24 deletions(-) diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 59b600713..2eb618fbf 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -125,6 +125,7 @@ #include #include +#include #include #include #include @@ -139,16 +140,19 @@ #include #include #include +#include #include #include #include #include #include "applypatch/imgdiff_image.h" +#include "rangeset.h" using android::base::get_unaligned; -static constexpr auto BUFFER_SIZE = 0x8000; +static constexpr size_t BLOCK_SIZE = 4096; +static constexpr size_t BUFFER_SIZE = 0x8000; // If we use this function to write the offset and length (type size_t), their values should not // exceed 2^63; because the signed bit will be casted away. @@ -162,6 +166,67 @@ static inline bool Write4(int fd, int32_t value) { return android::base::WriteFully(fd, &value, sizeof(int32_t)); } +// Trim the head or tail to align with the block size. Return false if the chunk has nothing left +// after alignment. +static bool AlignHead(size_t* start, size_t* length) { + size_t residual = (*start % BLOCK_SIZE == 0) ? 0 : BLOCK_SIZE - *start % BLOCK_SIZE; + + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the beginning. + *start += residual; + *length -= residual; + return true; +} + +static bool AlignTail(size_t* start, size_t* length) { + size_t residual = (*start + *length) % BLOCK_SIZE; + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the end. + *length -= residual; + return true; +} + +// Remove the used blocks from the source chunk to make sure the source ranges are mutually +// exclusive after split. Return false if we fail to get the non-overlapped ranges. In such +// a case, we'll skip the entire source chunk. +static bool RemoveUsedBlocks(size_t* start, size_t* length, const SortedRangeSet& used_ranges) { + if (!used_ranges.Overlaps(*start, *length)) { + return true; + } + + // TODO find the largest non-overlap chunk. + printf("Removing block %s from %zu - %zu\n", used_ranges.ToString().c_str(), *start, + *start + *length - 1); + + // If there's no duplicate entry name, we should only overlap in the head or tail block. Try to + // trim both blocks. Skip this source chunk in case it still overlaps with the used ranges. + if (AlignHead(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + if (AlignTail(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + + printf("Failed to remove the overlapped block ranges; skip the source\n"); + return false; +} + +static const struct option OPTIONS[] = { + { "zip-mode", no_argument, nullptr, 'z' }, + { "bonus-file", required_argument, nullptr, 'b' }, + { "block-limit", required_argument, nullptr, 0 }, + { "debug-dir", required_argument, nullptr, 0 }, + { nullptr, 0, nullptr, 0 }, +}; + ImageChunk::ImageChunk(int type, size_t start, const std::vector* file_content, size_t raw_data_len, std::string entry_name) : type_(type), @@ -371,6 +436,12 @@ bool PatchChunk::RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size) { return (tgt.GetType() == CHUNK_NORMAL && (target_len <= 160 || target_len < patch_size)); } +void PatchChunk::UpdateSourceOffset(const SortedRangeSet& src_range) { + if (type_ == CHUNK_DEFLATE) { + source_start_ = src_range.GetOffsetInRangeSet(source_start_); + } +} + // Header size: // header_type 4 bytes // CHUNK_NORMAL 8*3 = 24 bytes @@ -572,7 +643,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl ZipString name; ZipEntry entry; while ((ret = Next(cookie, &entry, &name)) == 0) { - if (entry.method == kCompressDeflated) { + if (entry.method == kCompressDeflated || limit_ > 0) { std::string entry_name(name.name, name.name + name.name_length); temp_entries.emplace_back(entry_name, entry); } @@ -595,6 +666,17 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl return false; } } + + // Add the end of zip file (mainly central directory) as a normal chunk. + size_t entries_end = 0; + if (!temp_entries.empty()) { + entries_end = static_cast(temp_entries.back().second.offset + + temp_entries.back().second.compressed_length); + } + CHECK_LT(entries_end, file_content_.size()); + chunks_.emplace_back(CHUNK_NORMAL, entries_end, &file_content_, + file_content_.size() - entries_end); + return true; } @@ -635,7 +717,21 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry) { size_t compressed_len = entry->compressed_length; - if (entry->method == kCompressDeflated) { + if (compressed_len == 0) return true; + + // Split the entry into several normal chunks if it's too large. + if (limit_ > 0 && compressed_len > limit_) { + int count = 0; + while (compressed_len > 0) { + size_t length = std::min(limit_, compressed_len); + std::string name = entry_name + "-" + std::to_string(count); + chunks_.emplace_back(CHUNK_NORMAL, entry->offset + limit_ * count, &file_content_, length, + name); + + count++; + compressed_len -= length; + } + } else if (entry->method == kCompressDeflated) { size_t uncompressed_len = entry->uncompressed_length; std::vector uncompressed_data(uncompressed_len); int ret = ExtractToMemory(handle, entry, uncompressed_data.data(), uncompressed_len); @@ -697,10 +793,23 @@ const ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool fi return nullptr; } for (auto& chunk : chunks_) { - if ((chunk.GetType() == CHUNK_DEFLATE || find_normal) && chunk.GetEntryName() == name) { + if (chunk.GetType() != CHUNK_DEFLATE && !find_normal) { + continue; + } + + if (chunk.GetEntryName() == name) { return &chunk; } + + // Edge case when target chunk is split due to size limit but source chunk isn't. + if (name == (chunk.GetEntryName() + "-0") || chunk.GetEntryName() == (name + "-0")) { + return &chunk; + } + + // TODO handle the .so files with incremental version number. + // (e.g. lib/arm64-v8a/libcronet.59.0.3050.4.so) } + return nullptr; } @@ -738,24 +847,214 @@ bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* // For zips, we only need merge normal chunks for the target: deflated chunks are matched via // filename, and normal chunks are patched using the entire source file as the source. - tgt_image->MergeAdjacentNormalChunks(); - tgt_image->DumpChunks(); + if (tgt_image->limit_ == 0) { + tgt_image->MergeAdjacentNormalChunks(); + tgt_image->DumpChunks(); + } return true; } -bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, - const std::string& patch_name) { +// For each target chunk, look for the corresponding source chunk by the zip_entry name. If +// found, add the range of this chunk in the original source file to the block aligned source +// ranges. Construct the split src & tgt image once the size of source range reaches limit. +bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector* split_tgt_images, + std::vector* split_src_images, + std::vector* split_src_ranges) { + CHECK_EQ(tgt_image.limit_, src_image.limit_); + size_t limit = tgt_image.limit_; + + src_image.DumpChunks(); + printf("Splitting %zu tgt chunks...\n", tgt_image.NumOfChunks()); + + SortedRangeSet used_src_ranges; // ranges used for previous split source images. + + // Reserve the central directory in advance for the last split image. + const auto& central_directory = src_image.cend() - 1; + CHECK_EQ(CHUNK_NORMAL, central_directory->GetType()); + used_src_ranges.Insert(central_directory->GetStartOffset(), + central_directory->DataLengthForPatch()); + + SortedRangeSet src_ranges; + std::vector split_src_chunks; + std::vector split_tgt_chunks; + for (auto tgt = tgt_image.cbegin(); tgt != tgt_image.cend(); tgt++) { + const ImageChunk* src = src_image.FindChunkByName(tgt->GetEntryName(), true); + if (src == nullptr) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + continue; + } + + size_t src_offset = src->GetStartOffset(); + size_t src_length = src->GetRawDataLength(); + + CHECK(src_length > 0); + CHECK_LE(src_length, limit); + + // Make sure this source range hasn't been used before so that the src_range pieces don't + // overlap with each other. + if (!RemoveUsedBlocks(&src_offset, &src_length, used_src_ranges)) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } else if (src_ranges.blocks() * BLOCK_SIZE + src_length <= limit) { + src_ranges.Insert(src_offset, src_length); + + // Add the deflate source chunk if it hasn't been aligned. + if (src->GetType() == CHUNK_DEFLATE && src_length == src->GetRawDataLength()) { + split_src_chunks.push_back(*src); + split_tgt_chunks.push_back(*tgt); + } else { + // TODO split smarter to avoid alignment of large deflate chunks + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } + } else { + ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, + split_src_chunks, split_tgt_images, + split_src_images); + + split_tgt_chunks.clear(); + split_src_chunks.clear(); + used_src_ranges.Insert(src_ranges); + split_src_ranges->push_back(std::move(src_ranges)); + src_ranges.Clear(); + + // We don't have enough space for the current chunk; start a new split image and handle + // this chunk there. + tgt--; + } + } + + // TODO Trim it in case the CD exceeds limit too much. + src_ranges.Insert(central_directory->GetStartOffset(), central_directory->DataLengthForPatch()); + ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, + split_src_chunks, split_tgt_images, split_src_images); + split_src_ranges->push_back(std::move(src_ranges)); + + ValidateSplitImages(*split_tgt_images, *split_src_images, *split_src_ranges, + tgt_image.file_content_.size()); + + return true; +} + +void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector& split_tgt_chunks, + const std::vector& split_src_chunks, + std::vector* split_tgt_images, + std::vector* split_src_images) { + CHECK(!split_tgt_chunks.empty()); + // Target chunks should occupy at least one block. + // TODO put a warning and change the type to raw if it happens in extremely rare cases. + size_t tgt_size = split_tgt_chunks.back().GetStartOffset() + + split_tgt_chunks.back().DataLengthForPatch() - + split_tgt_chunks.front().GetStartOffset(); + CHECK_GE(tgt_size, BLOCK_SIZE); + + std::vector aligned_tgt_chunks; + + // Align the target chunks in the beginning with BLOCK_SIZE. + size_t i = 0; + while (i < split_tgt_chunks.size()) { + size_t tgt_start = split_tgt_chunks[i].GetStartOffset(); + size_t tgt_length = split_tgt_chunks[i].GetRawDataLength(); + + // Current ImageChunk is long enough to align. + if (AlignHead(&tgt_start, &tgt_length)) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt_start, &tgt_image.file_content_, + tgt_length); + break; + } + + i++; + } + CHECK_LT(i, split_tgt_chunks.size()); + aligned_tgt_chunks.insert(aligned_tgt_chunks.end(), split_tgt_chunks.begin() + i + 1, + split_tgt_chunks.end()); + CHECK(!aligned_tgt_chunks.empty()); + + // Add a normal chunk to align the contents in the end. + size_t end_offset = + aligned_tgt_chunks.back().GetStartOffset() + aligned_tgt_chunks.back().GetRawDataLength(); + if (end_offset % BLOCK_SIZE != 0 && end_offset < tgt_image.file_content_.size()) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, end_offset, &tgt_image.file_content_, + BLOCK_SIZE - (end_offset % BLOCK_SIZE)); + } + + ZipModeImage split_tgt_image(false); + split_tgt_image.Initialize(std::move(aligned_tgt_chunks), {}); + split_tgt_image.MergeAdjacentNormalChunks(); + + // Construct the dummy source file based on the src_ranges. + std::vector src_content; + for (const auto& r : split_src_ranges) { + size_t end = std::min(src_image.file_content_.size(), r.second * BLOCK_SIZE); + src_content.insert(src_content.end(), src_image.file_content_.begin() + r.first * BLOCK_SIZE, + src_image.file_content_.begin() + end); + } + + // We should not have an empty src in our design; otherwise we will encounter an error in + // bsdiff since src_content.data() == nullptr. + CHECK(!src_content.empty()); + + ZipModeImage split_src_image(true); + split_src_image.Initialize(split_src_chunks, std::move(src_content)); + + split_tgt_images->push_back(std::move(split_tgt_image)); + split_src_images->push_back(std::move(split_src_image)); +} + +void ZipModeImage::ValidateSplitImages(const std::vector& split_tgt_images, + const std::vector& split_src_images, + std::vector& split_src_ranges, + size_t total_tgt_size) { + CHECK_EQ(split_tgt_images.size(), split_src_images.size()); + + printf("Validating %zu images\n", split_tgt_images.size()); + + // Verify that the target image pieces is continuous and can add up to the total size. + size_t last_offset = 0; + for (const auto& tgt_image : split_tgt_images) { + CHECK(!tgt_image.chunks_.empty()); + + CHECK_EQ(last_offset, tgt_image.chunks_.front().GetStartOffset()); + CHECK(last_offset % BLOCK_SIZE == 0); + + // Check the target chunks within the split image are continuous. + for (const auto& chunk : tgt_image.chunks_) { + CHECK_EQ(last_offset, chunk.GetStartOffset()); + last_offset += chunk.GetRawDataLength(); + } + } + CHECK_EQ(total_tgt_size, last_offset); + + // Verify that the source ranges are mutually exclusive. + CHECK_EQ(split_src_images.size(), split_src_ranges.size()); + SortedRangeSet used_src_ranges; + for (size_t i = 0; i < split_src_ranges.size(); i++) { + CHECK(!used_src_ranges.Overlaps(split_src_ranges[i])) + << "src range " << split_src_ranges[i].ToString() << " overlaps " + << used_src_ranges.ToString(); + used_src_ranges.Insert(split_src_ranges[i]); + } +} + +bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector* patch_chunks) { printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); - std::vector patch_chunks; - patch_chunks.reserve(tgt_image.NumOfChunks()); + patch_chunks->clear(); saidx_t* bsdiff_cache = nullptr; for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { const auto& tgt_chunk = tgt_image[i]; if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { - patch_chunks.emplace_back(tgt_chunk); + patch_chunks->emplace_back(tgt_chunk); continue; } @@ -776,13 +1075,23 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI tgt_chunk.GetRawDataLength()); if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { - patch_chunks.emplace_back(tgt_chunk); + patch_chunks->emplace_back(tgt_chunk); } else { - patch_chunks.emplace_back(tgt_chunk, src_ref, std::move(patch_data)); + patch_chunks->emplace_back(tgt_chunk, src_ref, std::move(patch_data)); } } free(bsdiff_cache); + CHECK_EQ(patch_chunks->size(), tgt_image.NumOfChunks()); + return true; +} + +bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + const std::string& patch_name) { + std::vector patch_chunks; + + ZipModeImage::GeneratePatchesInternal(tgt_image, src_image, &patch_chunks); + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); android::base::unique_fd patch_fd( @@ -795,6 +1104,66 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } +bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_images, + const std::vector& split_src_images, + const std::vector& split_src_ranges, + const std::string& patch_name, const std::string& debug_dir) { + printf("Construct patches for %zu split images...\n", split_tgt_images.size()); + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + return false; + } + + for (size_t i = 0; i < split_tgt_images.size(); i++) { + std::vector patch_chunks; + if (!ZipModeImage::GeneratePatchesInternal(split_tgt_images[i], split_src_images[i], + &patch_chunks)) { + printf("failed to generate split patch\n"); + return false; + } + + for (auto& p : patch_chunks) { + p.UpdateSourceOffset(split_src_ranges[i]); + } + + if (!PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd)) { + return false; + } + + // Write the split source & patch into the debug directory. + if (!debug_dir.empty()) { + std::string src_name = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + android::base::unique_fd fd( + open(src_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + printf("Failed to open %s\n", src_name.c_str()); + return false; + } + if (!android::base::WriteFully(fd, split_src_images[i].PseudoSource().DataForPatch(), + split_src_images[i].PseudoSource().DataLengthForPatch())) { + printf("Failed to write split source data into %s\n", src_name.c_str()); + return false; + } + + std::string patch_name = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + fd.reset(open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + printf("Failed to open %s\n", patch_name.c_str()); + return false; + } + if (!PatchChunk::WritePatchDataToFd(patch_chunks, fd)) { + return false; + } + } + } + return true; +} + bool ImageModeImage::Initialize(const std::string& filename) { if (!ReadFile(filename, &file_content_)) { return false; @@ -1026,11 +1395,14 @@ bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, int imgdiff(int argc, const char** argv) { bool zip_mode = false; std::vector bonus_data; + size_t blocks_limit = 0; + std::string debug_dir; int opt; + int option_index; optind = 1; // Reset the getopt state so that we can call it multiple times for test. - while ((opt = getopt(argc, const_cast(argv), "zb:")) != -1) { + while ((opt = getopt_long(argc, const_cast(argv), "zb:", OPTIONS, &option_index)) != -1) { switch (opt) { case 'z': zip_mode = true; @@ -1055,6 +1427,16 @@ int imgdiff(int argc, const char** argv) { } break; } + case 0: { + std::string name = OPTIONS[option_index].name; + if (name == "block-limit" && !android::base::ParseUint(optarg, &blocks_limit)) { + printf("failed to parse size blocks_limit: %s\n", optarg); + return 1; + } else if (name == "debug-dir") { + debug_dir = optarg; + } + break; + } default: printf("unexpected opt: %s\n", optarg); return 2; @@ -1062,13 +1444,20 @@ int imgdiff(int argc, const char** argv) { } if (argc - optind != 3) { - printf("usage: %s [-z] [-b ] \n", argv[0]); + printf("usage: %s [options] \n", argv[0]); + printf( + " -z , Generate patches in zip mode, src and tgt should be zip files.\n" + " -b , Bonus file in addition to src, image mode only.\n" + " --block-limit, For large zips, split the src and tgt based on the block limit;\n" + " and generate patches between each pair of pieces. Concatenate these\n" + " patches together and output them into .\n" + " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n"); return 2; } if (zip_mode) { - ZipModeImage src_image(true); - ZipModeImage tgt_image(false); + ZipModeImage src_image(true, blocks_limit * BLOCK_SIZE); + ZipModeImage tgt_image(false, blocks_limit * BLOCK_SIZE); if (!src_image.Initialize(argv[optind])) { return 1; @@ -1080,9 +1469,25 @@ int imgdiff(int argc, const char** argv) { if (!ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { return 1; } + + // TODO save and output the split information so that caller can create split transfer lists + // accordingly. + // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of // deflate chunks). - if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { + if (blocks_limit > 0) { + std::vector split_tgt_images; + std::vector split_src_images; + std::vector split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + if (!ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + argv[optind + 2], debug_dir)) { + return 1; + } + + } else if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { return 1; } } else { diff --git a/applypatch/include/applypatch/imgdiff_image.h b/applypatch/include/applypatch/imgdiff_image.h index 221dd5ab5..9fb844b24 100644 --- a/applypatch/include/applypatch/imgdiff_image.h +++ b/applypatch/include/applypatch/imgdiff_image.h @@ -129,6 +129,9 @@ class PatchChunk { // Return true if raw data size is smaller than the patch size. static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); + // Update the source start with the new offset within the source range. + void UpdateSourceOffset(const SortedRangeSet& src_range); + static bool WritePatchDataToFd(const std::vector& patch_chunks, int patch_fd); private: @@ -177,6 +180,14 @@ class Image { return chunks_.end(); } + std::vector::const_iterator cbegin() const { + return chunks_.cbegin(); + } + + std::vector::const_iterator cend() const { + return chunks_.cend(); + } + ImageChunk& operator[](size_t i); const ImageChunk& operator[](size_t i) const; @@ -194,10 +205,18 @@ class Image { class ZipModeImage : public Image { public: - explicit ZipModeImage(bool is_source) : Image(is_source) {} + explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {} bool Initialize(const std::string& filename) override; + // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we + // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img + // pieces; because for each chunk both the data and their offset within the file are unchanged. + void Initialize(const std::vector& chunks, const std::vector& file_content) { + chunks_ = chunks; + file_content_ = file_content; + } + // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in // fact the whole source file. ImageChunk PseudoSource() const; @@ -216,6 +235,21 @@ class ZipModeImage : public Image { static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, const std::string& patch_name); + // Compute the patch based on the lists of split src and tgt images. Generate patches for each + // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write + // each split src data and patch data into that directory. + static bool GeneratePatches(const std::vector& split_tgt_images, + const std::vector& split_src_images, + const std::vector& split_src_ranges, + const std::string& patch_name, const std::string& debug_dir); + + // Split the tgt chunks and src chunks based on the size limit. + static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector* split_tgt_images, + std::vector* split_src_images, + std::vector* split_src_ranges); + private: // Initialize image chunks based on the zip entries. bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); @@ -223,6 +257,28 @@ class ZipModeImage : public Image { bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); // Return the real size of the zip file. (omit the trailing zeros that used for alignment) bool GetZipFileSize(size_t* input_file_size); + + static void ValidateSplitImages(const std::vector& split_tgt_images, + const std::vector& split_src_images, + std::vector& split_src_ranges, + size_t total_tgt_size); + // Construct the dummy split images based on the chunks info and source ranges; and move them into + // the given vectors. + static void AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector& split_tgt_chunks, + const std::vector& split_src_chunks, + std::vector* split_tgt_images, + std::vector* split_src_images); + + // Function that actually iterates the tgt_chunks and makes patches. + static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + std::vector* patch_chunks); + + // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit, + // we'll split that entry into several smaller chunks in advance. + size_t limit_; }; class ImageModeImage : public Image { diff --git a/tests/component/imgdiff_test.cpp b/tests/component/imgdiff_test.cpp index bf25aebb0..3163a57cf 100644 --- a/tests/component/imgdiff_test.cpp +++ b/tests/component/imgdiff_test.cpp @@ -16,13 +16,17 @@ #include +#include #include +#include #include #include #include +#include #include #include +#include #include #include #include @@ -75,15 +79,20 @@ static void verify_patch_header(const std::string& patch, size_t* num_normal, si if (num_deflate != nullptr) *num_deflate = deflate; } -static void verify_patched_image(const std::string& src, const std::string& patch, - const std::string& tgt) { - std::string patched; +static void GenerateTarget(const std::string& src, const std::string& patch, std::string* patched) { + patched->clear(); ASSERT_EQ(0, ApplyImagePatch(reinterpret_cast(src.data()), src.size(), reinterpret_cast(patch.data()), patch.size(), - [&patched](const unsigned char* data, size_t len) { - patched.append(reinterpret_cast(data), len); + [&](const unsigned char* data, size_t len) { + patched->append(reinterpret_cast(data), len); return len; })); +} + +static void verify_patched_image(const std::string& src, const std::string& patch, + const std::string& tgt) { + std::string patched; + GenerateTarget(src, patch, &patched); ASSERT_EQ(tgt, patched); } @@ -623,3 +632,323 @@ TEST(ImgpatchTest, image_mode_patch_corruption) { reinterpret_cast(patch.data()), patch.size(), [](const unsigned char* /*data*/, size_t len) { return len; })); } + +static void construct_store_entry(const std::vector>& info, + ZipWriter* writer) { + for (auto& t : info) { + // Create t(1) blocks of t(2), and write the data to t(0) + ASSERT_EQ(0, writer->StartEntry(std::get<0>(t).c_str(), 0)); + const std::string content(std::get<1>(t) * 4096, std::get<2>(t)); + ASSERT_EQ(0, writer->WriteBytes(content.data(), content.size())); + ASSERT_EQ(0, writer->FinishEntry()); + } +} + +static void construct_deflate_entry(const std::vector>& info, + ZipWriter* writer, const std::string& data) { + for (auto& t : info) { + // t(0): entry_name; t(1): block offset; t(2) length in blocks. + ASSERT_EQ(0, writer->StartEntry(std::get<0>(t).c_str(), ZipWriter::kCompress)); + ASSERT_EQ(0, writer->WriteBytes(data.data() + std::get<1>(t) * 4096, std::get<2>(t) * 4096)); + ASSERT_EQ(0, writer->FinishEntry()); + } +} + +// Look for the generated source and patch pieces in the debug_dir and generate the target on +// each pair. Concatenate the split target and match against the orignal one. +static void GenerateAndCheckSplitTarget(const std::string& debug_dir, size_t count, + const std::string& tgt) { + std::string patched; + for (size_t i = 0; i < count; i++) { + std::string split_src_path = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + std::string split_patch_path = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + + std::string split_src; + std::string split_patch; + ASSERT_TRUE(android::base::ReadFileToString(split_src_path, &split_src)); + ASSERT_TRUE(android::base::ReadFileToString(split_patch_path, &split_patch)); + + std::string split_tgt; + GenerateTarget(split_src, split_patch, &split_tgt); + patched += split_tgt; + } + + // Verify we can get back the original target image. + ASSERT_EQ(tgt, patched); +} + +std::vector ConstructImageChunks( + const std::vector& content, const std::vector>& info) { + std::vector chunks; + size_t start = 0; + for (const auto& t : info) { + size_t length = std::get<1>(t); + chunks.emplace_back(CHUNK_NORMAL, start, &content, length, std::get<0>(t)); + start += length; + } + + return chunks; +} + +TEST(ImgdiffTest, zip_mode_split_image_smoke) { + std::vector content; + content.reserve(4096 * 50); + uint8_t n = 0; + generate_n(back_inserter(content), 4096 * 50, [&n]() { return n++ / 4096; }); + + ZipModeImage tgt_image(false, 4096 * 10); + std::vector tgt_chunks = ConstructImageChunks(content, { { "a", 100 }, + { "b", 4096 * 2 }, + { "c", 4096 * 3 }, + { "d", 300 }, + { "e-0", 4096 * 10 }, + { "e-1", 4096 * 5 }, + { "CD", 200 } }); + tgt_image.Initialize(std::move(tgt_chunks), + std::vector(content.begin(), content.begin() + 82520)); + + tgt_image.DumpChunks(); + + ZipModeImage src_image(true, 4096 * 10); + std::vector src_chunks = ConstructImageChunks(content, { { "b", 4096 * 3 }, + { "c-0", 4096 * 10 }, + { "c-1", 4096 * 2 }, + { "a", 4096 * 5 }, + { "e-0", 4096 * 10 }, + { "e-1", 10000 }, + { "CD", 5000 } }); + src_image.Initialize(std::move(src_chunks), + std::vector(content.begin(), content.begin() + 137880)); + + std::vector split_tgt_images; + std::vector split_src_images; + std::vector split_src_ranges; + + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + // src_piece 1: a 5 blocks, b 3 blocks + // src_piece 2: c-0 10 blocks + // src_piece 3: d 0 block, e-0 10 blocks + // src_piece 4: e-1 2 blocks; CD 2 blocks + ASSERT_EQ(split_tgt_images.size(), split_src_images.size()); + ASSERT_EQ(static_cast(4), split_tgt_images.size()); + + ASSERT_EQ(static_cast(1), split_tgt_images[0].NumOfChunks()); + ASSERT_EQ(static_cast(12288), split_tgt_images[0][0].DataLengthForPatch()); + ASSERT_EQ("4,0,3,15,20", split_src_ranges[0].ToString()); + + ASSERT_EQ(static_cast(1), split_tgt_images[1].NumOfChunks()); + ASSERT_EQ(static_cast(12288), split_tgt_images[1][0].DataLengthForPatch()); + ASSERT_EQ("2,3,13", split_src_ranges[1].ToString()); + + ASSERT_EQ(static_cast(1), split_tgt_images[2].NumOfChunks()); + ASSERT_EQ(static_cast(40960), split_tgt_images[2][0].DataLengthForPatch()); + ASSERT_EQ("2,20,30", split_src_ranges[2].ToString()); + + ASSERT_EQ(static_cast(1), split_tgt_images[3].NumOfChunks()); + ASSERT_EQ(static_cast(16984), split_tgt_images[3][0].DataLengthForPatch()); + ASSERT_EQ("2,30,34", split_src_ranges[3].ToString()); +} + +TEST(ImgdiffTest, zip_mode_store_large_apk) { + // Construct src and tgt zip files with limit = 10 blocks. + // src tgt + // 12 blocks 'd' 3 blocks 'a' + // 8 blocks 'c' 3 blocks 'b' + // 3 blocks 'b' 8 blocks 'c' (exceeds limit) + // 3 blocks 'a' 12 blocks 'd' (exceeds limit) + // 3 blocks 'e' + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + construct_store_entry( + { { "a", 3, 'a' }, { "b", 3, 'b' }, { "c", 8, 'c' }, { "d", 12, 'd' }, { "e", 3, 'e' } }, + &tgt_writer); + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_store_entry({ { "d", 12, 'd' }, { "c", 8, 'c' }, { "b", 3, 'b' }, { "a", 3, 'a' } }, + &src_writer); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector args = { + "imgdiff", "-z", "--block-limit=10", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 4 pieces of patch.(Rougly 3'a',3'b'; 8'c'; 10'd'; 2'd'3'e') + GenerateAndCheckSplitTarget(debug_dir.path, 4, tgt); +} + +TEST(ImgdiffTest, zip_mode_deflate_large_apk) { + // Generate 50 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 50); + generate_n(back_inserter(random_data), 4096 * 50, []() { return rand() % 256; }); + + // Construct src and tgt zip files with limit = 10 blocks. + // src tgt + // 22 blocks, "d" 4 blocks, "a" + // 5 blocks, "b" 4 blocks, "b" + // 3 blocks, "a" 7 blocks, "c" (exceeds limit) + // 8 blocks, "c" 20 blocks, "d" (exceeds limit) + // 1 block, "f" 2 blocks, "e" + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry( + { { "a", 0, 4 }, { "b", 5, 4 }, { "c", 12, 8 }, { "d", 21, 20 }, { "e", 45, 2 }, + { "f", 48, 1 } }, &tgt_writer, random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + + construct_deflate_entry( + { { "d", 21, 22 }, { "b", 5, 5 }, { "a", 0, 3 }, { "g", 9, 1 }, { "c", 11, 8 }, + { "f", 45, 1 } }, &src_writer, random_data); + + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + ZipModeImage src_image(true, 10 * 4096); + ZipModeImage tgt_image(false, 10 * 4096); + ASSERT_TRUE(src_image.Initialize(src_file.path)); + ASSERT_TRUE(tgt_image.Initialize(tgt_file.path)); + ASSERT_TRUE(ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)); + + src_image.DumpChunks(); + tgt_image.DumpChunks(); + + std::vector split_tgt_images; + std::vector split_src_images; + std::vector split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + // src_piece 1: a 3 blocks, b 5 blocks + // src_piece 2: c 8 blocks + // src_piece 3: d-0 10 block + // src_piece 4: d-1 10 blocks + // src_piece 5: e 1 block, CD + ASSERT_EQ(split_tgt_images.size(), split_src_images.size()); + ASSERT_EQ(static_cast(5), split_tgt_images.size()); + + ASSERT_EQ(static_cast(2), split_src_images[0].NumOfChunks()); + ASSERT_EQ("a", split_src_images[0][0].GetEntryName()); + ASSERT_EQ("b", split_src_images[0][1].GetEntryName()); + + ASSERT_EQ(static_cast(1), split_src_images[1].NumOfChunks()); + ASSERT_EQ("c", split_src_images[1][0].GetEntryName()); + + ASSERT_EQ(static_cast(0), split_src_images[2].NumOfChunks()); + ASSERT_EQ(static_cast(0), split_src_images[3].NumOfChunks()); + ASSERT_EQ(static_cast(0), split_src_images[4].NumOfChunks()); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + ASSERT_TRUE(ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + patch_file.path, debug_dir.path)); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 5 pieces of patch. ["a","b"; "c"; "d-0"; "d-1"; "e"] + GenerateAndCheckSplitTarget(debug_dir.path, 5, tgt); +} + +TEST(ImgdiffTest, zip_mode_no_match_source) { + // Generate 20 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 20); + generate_n(back_inserter(random_data), 4096 * 20, []() { return rand() % 256; }); + + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry({ { "a", 0, 4 }, { "b", 5, 5 }, { "c", 11, 5 } }, &tgt_writer, + random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + // We don't have a matching source entry. + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_store_entry({ { "d", 1, 'd' } }, &src_writer); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector args = { + "imgdiff", "-z", "--block-limit=10", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 1 pieces of patch due to no matching source entry. + GenerateAndCheckSplitTarget(debug_dir.path, 1, tgt); +} + +TEST(ImgdiffTest, zip_mode_large_enough_limit) { + // Generate 20 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 20); + generate_n(back_inserter(random_data), 4096 * 20, []() { return rand() % 256; }); + + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry({ { "a", 0, 10 }, { "b", 10, 5 } }, &tgt_writer, random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + // Construct 10 blocks of source. + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_deflate_entry({ { "a", 1, 10 } }, &src_writer, random_data); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch with a limit of 20 blocks. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector args = { + "imgdiff", "-z", "--block-limit=20", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 1 pieces of patch since limit is larger than the zip file size. + GenerateAndCheckSplitTarget(debug_dir.path, 1, tgt); +} -- cgit v1.2.3