diff options
author | Tianjie Xu <xunchang@google.com> | 2017-09-08 22:48:45 +0200 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2017-09-08 22:48:45 +0200 |
commit | b2dac7042315cceb978831432b1587d1a3cd90dc (patch) | |
tree | e3952dcaf7cb043a96d42a060a4b8a70085aa36d | |
parent | Merge "wear_ui: Remove PrintOnScreenOnly() and PrintV()." am: 8c753f6253 (diff) | |
parent | Merge "Improve imgdiff for large zip files" (diff) | |
download | android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.tar android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.tar.gz android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.tar.bz2 android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.tar.lz android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.tar.xz android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.tar.zst android_bootable_recovery-b2dac7042315cceb978831432b1587d1a3cd90dc.zip |
Diffstat (limited to '')
-rw-r--r-- | applypatch/imgdiff.cpp | 441 | ||||
-rw-r--r-- | applypatch/include/applypatch/imgdiff_image.h | 58 | ||||
-rw-r--r-- | tests/component/imgdiff_test.cpp | 339 |
3 files changed, 814 insertions, 24 deletions
diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 59b600713..2eb618fbf 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -125,6 +125,7 @@ #include <errno.h> #include <fcntl.h> +#include <getopt.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -139,16 +140,19 @@ #include <android-base/file.h> #include <android-base/logging.h> #include <android-base/memory.h> +#include <android-base/parseint.h> #include <android-base/unique_fd.h> #include <bsdiff.h> #include <ziparchive/zip_archive.h> #include <zlib.h> #include "applypatch/imgdiff_image.h" +#include "rangeset.h" using android::base::get_unaligned; -static constexpr auto BUFFER_SIZE = 0x8000; +static constexpr size_t BLOCK_SIZE = 4096; +static constexpr size_t BUFFER_SIZE = 0x8000; // If we use this function to write the offset and length (type size_t), their values should not // exceed 2^63; because the signed bit will be casted away. @@ -162,6 +166,67 @@ static inline bool Write4(int fd, int32_t value) { return android::base::WriteFully(fd, &value, sizeof(int32_t)); } +// Trim the head or tail to align with the block size. Return false if the chunk has nothing left +// after alignment. +static bool AlignHead(size_t* start, size_t* length) { + size_t residual = (*start % BLOCK_SIZE == 0) ? 0 : BLOCK_SIZE - *start % BLOCK_SIZE; + + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the beginning. + *start += residual; + *length -= residual; + return true; +} + +static bool AlignTail(size_t* start, size_t* length) { + size_t residual = (*start + *length) % BLOCK_SIZE; + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the end. + *length -= residual; + return true; +} + +// Remove the used blocks from the source chunk to make sure the source ranges are mutually +// exclusive after split. Return false if we fail to get the non-overlapped ranges. In such +// a case, we'll skip the entire source chunk. +static bool RemoveUsedBlocks(size_t* start, size_t* length, const SortedRangeSet& used_ranges) { + if (!used_ranges.Overlaps(*start, *length)) { + return true; + } + + // TODO find the largest non-overlap chunk. + printf("Removing block %s from %zu - %zu\n", used_ranges.ToString().c_str(), *start, + *start + *length - 1); + + // If there's no duplicate entry name, we should only overlap in the head or tail block. Try to + // trim both blocks. Skip this source chunk in case it still overlaps with the used ranges. + if (AlignHead(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + if (AlignTail(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + + printf("Failed to remove the overlapped block ranges; skip the source\n"); + return false; +} + +static const struct option OPTIONS[] = { + { "zip-mode", no_argument, nullptr, 'z' }, + { "bonus-file", required_argument, nullptr, 'b' }, + { "block-limit", required_argument, nullptr, 0 }, + { "debug-dir", required_argument, nullptr, 0 }, + { nullptr, 0, nullptr, 0 }, +}; + ImageChunk::ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len, std::string entry_name) : type_(type), @@ -371,6 +436,12 @@ bool PatchChunk::RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size) { return (tgt.GetType() == CHUNK_NORMAL && (target_len <= 160 || target_len < patch_size)); } +void PatchChunk::UpdateSourceOffset(const SortedRangeSet& src_range) { + if (type_ == CHUNK_DEFLATE) { + source_start_ = src_range.GetOffsetInRangeSet(source_start_); + } +} + // Header size: // header_type 4 bytes // CHUNK_NORMAL 8*3 = 24 bytes @@ -572,7 +643,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl ZipString name; ZipEntry entry; while ((ret = Next(cookie, &entry, &name)) == 0) { - if (entry.method == kCompressDeflated) { + if (entry.method == kCompressDeflated || limit_ > 0) { std::string entry_name(name.name, name.name + name.name_length); temp_entries.emplace_back(entry_name, entry); } @@ -595,6 +666,17 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl return false; } } + + // Add the end of zip file (mainly central directory) as a normal chunk. + size_t entries_end = 0; + if (!temp_entries.empty()) { + entries_end = static_cast<size_t>(temp_entries.back().second.offset + + temp_entries.back().second.compressed_length); + } + CHECK_LT(entries_end, file_content_.size()); + chunks_.emplace_back(CHUNK_NORMAL, entries_end, &file_content_, + file_content_.size() - entries_end); + return true; } @@ -635,7 +717,21 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry) { size_t compressed_len = entry->compressed_length; - if (entry->method == kCompressDeflated) { + if (compressed_len == 0) return true; + + // Split the entry into several normal chunks if it's too large. + if (limit_ > 0 && compressed_len > limit_) { + int count = 0; + while (compressed_len > 0) { + size_t length = std::min(limit_, compressed_len); + std::string name = entry_name + "-" + std::to_string(count); + chunks_.emplace_back(CHUNK_NORMAL, entry->offset + limit_ * count, &file_content_, length, + name); + + count++; + compressed_len -= length; + } + } else if (entry->method == kCompressDeflated) { size_t uncompressed_len = entry->uncompressed_length; std::vector<uint8_t> uncompressed_data(uncompressed_len); int ret = ExtractToMemory(handle, entry, uncompressed_data.data(), uncompressed_len); @@ -697,10 +793,23 @@ const ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool fi return nullptr; } for (auto& chunk : chunks_) { - if ((chunk.GetType() == CHUNK_DEFLATE || find_normal) && chunk.GetEntryName() == name) { + if (chunk.GetType() != CHUNK_DEFLATE && !find_normal) { + continue; + } + + if (chunk.GetEntryName() == name) { return &chunk; } + + // Edge case when target chunk is split due to size limit but source chunk isn't. + if (name == (chunk.GetEntryName() + "-0") || chunk.GetEntryName() == (name + "-0")) { + return &chunk; + } + + // TODO handle the .so files with incremental version number. + // (e.g. lib/arm64-v8a/libcronet.59.0.3050.4.so) } + return nullptr; } @@ -738,24 +847,214 @@ bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* // For zips, we only need merge normal chunks for the target: deflated chunks are matched via // filename, and normal chunks are patched using the entire source file as the source. - tgt_image->MergeAdjacentNormalChunks(); - tgt_image->DumpChunks(); + if (tgt_image->limit_ == 0) { + tgt_image->MergeAdjacentNormalChunks(); + tgt_image->DumpChunks(); + } return true; } -bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, - const std::string& patch_name) { +// For each target chunk, look for the corresponding source chunk by the zip_entry name. If +// found, add the range of this chunk in the original source file to the block aligned source +// ranges. Construct the split src & tgt image once the size of source range reaches limit. +bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images, + std::vector<SortedRangeSet>* split_src_ranges) { + CHECK_EQ(tgt_image.limit_, src_image.limit_); + size_t limit = tgt_image.limit_; + + src_image.DumpChunks(); + printf("Splitting %zu tgt chunks...\n", tgt_image.NumOfChunks()); + + SortedRangeSet used_src_ranges; // ranges used for previous split source images. + + // Reserve the central directory in advance for the last split image. + const auto& central_directory = src_image.cend() - 1; + CHECK_EQ(CHUNK_NORMAL, central_directory->GetType()); + used_src_ranges.Insert(central_directory->GetStartOffset(), + central_directory->DataLengthForPatch()); + + SortedRangeSet src_ranges; + std::vector<ImageChunk> split_src_chunks; + std::vector<ImageChunk> split_tgt_chunks; + for (auto tgt = tgt_image.cbegin(); tgt != tgt_image.cend(); tgt++) { + const ImageChunk* src = src_image.FindChunkByName(tgt->GetEntryName(), true); + if (src == nullptr) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + continue; + } + + size_t src_offset = src->GetStartOffset(); + size_t src_length = src->GetRawDataLength(); + + CHECK(src_length > 0); + CHECK_LE(src_length, limit); + + // Make sure this source range hasn't been used before so that the src_range pieces don't + // overlap with each other. + if (!RemoveUsedBlocks(&src_offset, &src_length, used_src_ranges)) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } else if (src_ranges.blocks() * BLOCK_SIZE + src_length <= limit) { + src_ranges.Insert(src_offset, src_length); + + // Add the deflate source chunk if it hasn't been aligned. + if (src->GetType() == CHUNK_DEFLATE && src_length == src->GetRawDataLength()) { + split_src_chunks.push_back(*src); + split_tgt_chunks.push_back(*tgt); + } else { + // TODO split smarter to avoid alignment of large deflate chunks + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } + } else { + ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, + split_src_chunks, split_tgt_images, + split_src_images); + + split_tgt_chunks.clear(); + split_src_chunks.clear(); + used_src_ranges.Insert(src_ranges); + split_src_ranges->push_back(std::move(src_ranges)); + src_ranges.Clear(); + + // We don't have enough space for the current chunk; start a new split image and handle + // this chunk there. + tgt--; + } + } + + // TODO Trim it in case the CD exceeds limit too much. + src_ranges.Insert(central_directory->GetStartOffset(), central_directory->DataLengthForPatch()); + ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, + split_src_chunks, split_tgt_images, split_src_images); + split_src_ranges->push_back(std::move(src_ranges)); + + ValidateSplitImages(*split_tgt_images, *split_src_images, *split_src_ranges, + tgt_image.file_content_.size()); + + return true; +} + +void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector<ImageChunk>& split_tgt_chunks, + const std::vector<ImageChunk>& split_src_chunks, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images) { + CHECK(!split_tgt_chunks.empty()); + // Target chunks should occupy at least one block. + // TODO put a warning and change the type to raw if it happens in extremely rare cases. + size_t tgt_size = split_tgt_chunks.back().GetStartOffset() + + split_tgt_chunks.back().DataLengthForPatch() - + split_tgt_chunks.front().GetStartOffset(); + CHECK_GE(tgt_size, BLOCK_SIZE); + + std::vector<ImageChunk> aligned_tgt_chunks; + + // Align the target chunks in the beginning with BLOCK_SIZE. + size_t i = 0; + while (i < split_tgt_chunks.size()) { + size_t tgt_start = split_tgt_chunks[i].GetStartOffset(); + size_t tgt_length = split_tgt_chunks[i].GetRawDataLength(); + + // Current ImageChunk is long enough to align. + if (AlignHead(&tgt_start, &tgt_length)) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt_start, &tgt_image.file_content_, + tgt_length); + break; + } + + i++; + } + CHECK_LT(i, split_tgt_chunks.size()); + aligned_tgt_chunks.insert(aligned_tgt_chunks.end(), split_tgt_chunks.begin() + i + 1, + split_tgt_chunks.end()); + CHECK(!aligned_tgt_chunks.empty()); + + // Add a normal chunk to align the contents in the end. + size_t end_offset = + aligned_tgt_chunks.back().GetStartOffset() + aligned_tgt_chunks.back().GetRawDataLength(); + if (end_offset % BLOCK_SIZE != 0 && end_offset < tgt_image.file_content_.size()) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, end_offset, &tgt_image.file_content_, + BLOCK_SIZE - (end_offset % BLOCK_SIZE)); + } + + ZipModeImage split_tgt_image(false); + split_tgt_image.Initialize(std::move(aligned_tgt_chunks), {}); + split_tgt_image.MergeAdjacentNormalChunks(); + + // Construct the dummy source file based on the src_ranges. + std::vector<uint8_t> src_content; + for (const auto& r : split_src_ranges) { + size_t end = std::min(src_image.file_content_.size(), r.second * BLOCK_SIZE); + src_content.insert(src_content.end(), src_image.file_content_.begin() + r.first * BLOCK_SIZE, + src_image.file_content_.begin() + end); + } + + // We should not have an empty src in our design; otherwise we will encounter an error in + // bsdiff since src_content.data() == nullptr. + CHECK(!src_content.empty()); + + ZipModeImage split_src_image(true); + split_src_image.Initialize(split_src_chunks, std::move(src_content)); + + split_tgt_images->push_back(std::move(split_tgt_image)); + split_src_images->push_back(std::move(split_src_image)); +} + +void ZipModeImage::ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + std::vector<SortedRangeSet>& split_src_ranges, + size_t total_tgt_size) { + CHECK_EQ(split_tgt_images.size(), split_src_images.size()); + + printf("Validating %zu images\n", split_tgt_images.size()); + + // Verify that the target image pieces is continuous and can add up to the total size. + size_t last_offset = 0; + for (const auto& tgt_image : split_tgt_images) { + CHECK(!tgt_image.chunks_.empty()); + + CHECK_EQ(last_offset, tgt_image.chunks_.front().GetStartOffset()); + CHECK(last_offset % BLOCK_SIZE == 0); + + // Check the target chunks within the split image are continuous. + for (const auto& chunk : tgt_image.chunks_) { + CHECK_EQ(last_offset, chunk.GetStartOffset()); + last_offset += chunk.GetRawDataLength(); + } + } + CHECK_EQ(total_tgt_size, last_offset); + + // Verify that the source ranges are mutually exclusive. + CHECK_EQ(split_src_images.size(), split_src_ranges.size()); + SortedRangeSet used_src_ranges; + for (size_t i = 0; i < split_src_ranges.size(); i++) { + CHECK(!used_src_ranges.Overlaps(split_src_ranges[i])) + << "src range " << split_src_ranges[i].ToString() << " overlaps " + << used_src_ranges.ToString(); + used_src_ranges.Insert(split_src_ranges[i]); + } +} + +bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector<PatchChunk>* patch_chunks) { printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); - std::vector<PatchChunk> patch_chunks; - patch_chunks.reserve(tgt_image.NumOfChunks()); + patch_chunks->clear(); saidx_t* bsdiff_cache = nullptr; for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { const auto& tgt_chunk = tgt_image[i]; if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { - patch_chunks.emplace_back(tgt_chunk); + patch_chunks->emplace_back(tgt_chunk); continue; } @@ -776,13 +1075,23 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI tgt_chunk.GetRawDataLength()); if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { - patch_chunks.emplace_back(tgt_chunk); + patch_chunks->emplace_back(tgt_chunk); } else { - patch_chunks.emplace_back(tgt_chunk, src_ref, std::move(patch_data)); + patch_chunks->emplace_back(tgt_chunk, src_ref, std::move(patch_data)); } } free(bsdiff_cache); + CHECK_EQ(patch_chunks->size(), tgt_image.NumOfChunks()); + return true; +} + +bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + const std::string& patch_name) { + std::vector<PatchChunk> patch_chunks; + + ZipModeImage::GeneratePatchesInternal(tgt_image, src_image, &patch_chunks); + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); android::base::unique_fd patch_fd( @@ -795,6 +1104,66 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } +bool ZipModeImage::GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + const std::vector<SortedRangeSet>& split_src_ranges, + const std::string& patch_name, const std::string& debug_dir) { + printf("Construct patches for %zu split images...\n", split_tgt_images.size()); + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + return false; + } + + for (size_t i = 0; i < split_tgt_images.size(); i++) { + std::vector<PatchChunk> patch_chunks; + if (!ZipModeImage::GeneratePatchesInternal(split_tgt_images[i], split_src_images[i], + &patch_chunks)) { + printf("failed to generate split patch\n"); + return false; + } + + for (auto& p : patch_chunks) { + p.UpdateSourceOffset(split_src_ranges[i]); + } + + if (!PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd)) { + return false; + } + + // Write the split source & patch into the debug directory. + if (!debug_dir.empty()) { + std::string src_name = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + android::base::unique_fd fd( + open(src_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + printf("Failed to open %s\n", src_name.c_str()); + return false; + } + if (!android::base::WriteFully(fd, split_src_images[i].PseudoSource().DataForPatch(), + split_src_images[i].PseudoSource().DataLengthForPatch())) { + printf("Failed to write split source data into %s\n", src_name.c_str()); + return false; + } + + std::string patch_name = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + fd.reset(open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + printf("Failed to open %s\n", patch_name.c_str()); + return false; + } + if (!PatchChunk::WritePatchDataToFd(patch_chunks, fd)) { + return false; + } + } + } + return true; +} + bool ImageModeImage::Initialize(const std::string& filename) { if (!ReadFile(filename, &file_content_)) { return false; @@ -1026,11 +1395,14 @@ bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, int imgdiff(int argc, const char** argv) { bool zip_mode = false; std::vector<uint8_t> bonus_data; + size_t blocks_limit = 0; + std::string debug_dir; int opt; + int option_index; optind = 1; // Reset the getopt state so that we can call it multiple times for test. - while ((opt = getopt(argc, const_cast<char**>(argv), "zb:")) != -1) { + while ((opt = getopt_long(argc, const_cast<char**>(argv), "zb:", OPTIONS, &option_index)) != -1) { switch (opt) { case 'z': zip_mode = true; @@ -1055,6 +1427,16 @@ int imgdiff(int argc, const char** argv) { } break; } + case 0: { + std::string name = OPTIONS[option_index].name; + if (name == "block-limit" && !android::base::ParseUint(optarg, &blocks_limit)) { + printf("failed to parse size blocks_limit: %s\n", optarg); + return 1; + } else if (name == "debug-dir") { + debug_dir = optarg; + } + break; + } default: printf("unexpected opt: %s\n", optarg); return 2; @@ -1062,13 +1444,20 @@ int imgdiff(int argc, const char** argv) { } if (argc - optind != 3) { - printf("usage: %s [-z] [-b <bonus-file>] <src-img> <tgt-img> <patch-file>\n", argv[0]); + printf("usage: %s [options] <src-img> <tgt-img> <patch-file>\n", argv[0]); + printf( + " -z <zip-mode>, Generate patches in zip mode, src and tgt should be zip files.\n" + " -b <bonus-file>, Bonus file in addition to src, image mode only.\n" + " --block-limit, For large zips, split the src and tgt based on the block limit;\n" + " and generate patches between each pair of pieces. Concatenate these\n" + " patches together and output them into <patch-file>.\n" + " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n"); return 2; } if (zip_mode) { - ZipModeImage src_image(true); - ZipModeImage tgt_image(false); + ZipModeImage src_image(true, blocks_limit * BLOCK_SIZE); + ZipModeImage tgt_image(false, blocks_limit * BLOCK_SIZE); if (!src_image.Initialize(argv[optind])) { return 1; @@ -1080,9 +1469,25 @@ int imgdiff(int argc, const char** argv) { if (!ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { return 1; } + + // TODO save and output the split information so that caller can create split transfer lists + // accordingly. + // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of // deflate chunks). - if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { + if (blocks_limit > 0) { + std::vector<ZipModeImage> split_tgt_images; + std::vector<ZipModeImage> split_src_images; + std::vector<SortedRangeSet> split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + if (!ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + argv[optind + 2], debug_dir)) { + return 1; + } + + } else if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { return 1; } } else { diff --git a/applypatch/include/applypatch/imgdiff_image.h b/applypatch/include/applypatch/imgdiff_image.h index 221dd5ab5..9fb844b24 100644 --- a/applypatch/include/applypatch/imgdiff_image.h +++ b/applypatch/include/applypatch/imgdiff_image.h @@ -129,6 +129,9 @@ class PatchChunk { // Return true if raw data size is smaller than the patch size. static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); + // Update the source start with the new offset within the source range. + void UpdateSourceOffset(const SortedRangeSet& src_range); + static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd); private: @@ -177,6 +180,14 @@ class Image { return chunks_.end(); } + std::vector<ImageChunk>::const_iterator cbegin() const { + return chunks_.cbegin(); + } + + std::vector<ImageChunk>::const_iterator cend() const { + return chunks_.cend(); + } + ImageChunk& operator[](size_t i); const ImageChunk& operator[](size_t i) const; @@ -194,10 +205,18 @@ class Image { class ZipModeImage : public Image { public: - explicit ZipModeImage(bool is_source) : Image(is_source) {} + explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {} bool Initialize(const std::string& filename) override; + // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we + // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img + // pieces; because for each chunk both the data and their offset within the file are unchanged. + void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) { + chunks_ = chunks; + file_content_ = file_content; + } + // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in // fact the whole source file. ImageChunk PseudoSource() const; @@ -216,6 +235,21 @@ class ZipModeImage : public Image { static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, const std::string& patch_name); + // Compute the patch based on the lists of split src and tgt images. Generate patches for each + // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write + // each split src data and patch data into that directory. + static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + const std::vector<SortedRangeSet>& split_src_ranges, + const std::string& patch_name, const std::string& debug_dir); + + // Split the tgt chunks and src chunks based on the size limit. + static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images, + std::vector<SortedRangeSet>* split_src_ranges); + private: // Initialize image chunks based on the zip entries. bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); @@ -223,6 +257,28 @@ class ZipModeImage : public Image { bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); // Return the real size of the zip file. (omit the trailing zeros that used for alignment) bool GetZipFileSize(size_t* input_file_size); + + static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + std::vector<SortedRangeSet>& split_src_ranges, + size_t total_tgt_size); + // Construct the dummy split images based on the chunks info and source ranges; and move them into + // the given vectors. + static void AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector<ImageChunk>& split_tgt_chunks, + const std::vector<ImageChunk>& split_src_chunks, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images); + + // Function that actually iterates the tgt_chunks and makes patches. + static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + std::vector<PatchChunk>* patch_chunks); + + // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit, + // we'll split that entry into several smaller chunks in advance. + size_t limit_; }; class ImageModeImage : public Image { diff --git a/tests/component/imgdiff_test.cpp b/tests/component/imgdiff_test.cpp index bf25aebb0..3163a57cf 100644 --- a/tests/component/imgdiff_test.cpp +++ b/tests/component/imgdiff_test.cpp @@ -16,13 +16,17 @@ #include <stdio.h> +#include <algorithm> #include <string> +#include <tuple> #include <vector> #include <android-base/file.h> #include <android-base/memory.h> +#include <android-base/stringprintf.h> #include <android-base/test_utils.h> #include <applypatch/imgdiff.h> +#include <applypatch/imgdiff_image.h> #include <applypatch/imgpatch.h> #include <gtest/gtest.h> #include <ziparchive/zip_writer.h> @@ -75,15 +79,20 @@ static void verify_patch_header(const std::string& patch, size_t* num_normal, si if (num_deflate != nullptr) *num_deflate = deflate; } -static void verify_patched_image(const std::string& src, const std::string& patch, - const std::string& tgt) { - std::string patched; +static void GenerateTarget(const std::string& src, const std::string& patch, std::string* patched) { + patched->clear(); ASSERT_EQ(0, ApplyImagePatch(reinterpret_cast<const unsigned char*>(src.data()), src.size(), reinterpret_cast<const unsigned char*>(patch.data()), patch.size(), - [&patched](const unsigned char* data, size_t len) { - patched.append(reinterpret_cast<const char*>(data), len); + [&](const unsigned char* data, size_t len) { + patched->append(reinterpret_cast<const char*>(data), len); return len; })); +} + +static void verify_patched_image(const std::string& src, const std::string& patch, + const std::string& tgt) { + std::string patched; + GenerateTarget(src, patch, &patched); ASSERT_EQ(tgt, patched); } @@ -623,3 +632,323 @@ TEST(ImgpatchTest, image_mode_patch_corruption) { reinterpret_cast<const unsigned char*>(patch.data()), patch.size(), [](const unsigned char* /*data*/, size_t len) { return len; })); } + +static void construct_store_entry(const std::vector<std::tuple<std::string, size_t, char>>& info, + ZipWriter* writer) { + for (auto& t : info) { + // Create t(1) blocks of t(2), and write the data to t(0) + ASSERT_EQ(0, writer->StartEntry(std::get<0>(t).c_str(), 0)); + const std::string content(std::get<1>(t) * 4096, std::get<2>(t)); + ASSERT_EQ(0, writer->WriteBytes(content.data(), content.size())); + ASSERT_EQ(0, writer->FinishEntry()); + } +} + +static void construct_deflate_entry(const std::vector<std::tuple<std::string, size_t, size_t>>& info, + ZipWriter* writer, const std::string& data) { + for (auto& t : info) { + // t(0): entry_name; t(1): block offset; t(2) length in blocks. + ASSERT_EQ(0, writer->StartEntry(std::get<0>(t).c_str(), ZipWriter::kCompress)); + ASSERT_EQ(0, writer->WriteBytes(data.data() + std::get<1>(t) * 4096, std::get<2>(t) * 4096)); + ASSERT_EQ(0, writer->FinishEntry()); + } +} + +// Look for the generated source and patch pieces in the debug_dir and generate the target on +// each pair. Concatenate the split target and match against the orignal one. +static void GenerateAndCheckSplitTarget(const std::string& debug_dir, size_t count, + const std::string& tgt) { + std::string patched; + for (size_t i = 0; i < count; i++) { + std::string split_src_path = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + std::string split_patch_path = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + + std::string split_src; + std::string split_patch; + ASSERT_TRUE(android::base::ReadFileToString(split_src_path, &split_src)); + ASSERT_TRUE(android::base::ReadFileToString(split_patch_path, &split_patch)); + + std::string split_tgt; + GenerateTarget(split_src, split_patch, &split_tgt); + patched += split_tgt; + } + + // Verify we can get back the original target image. + ASSERT_EQ(tgt, patched); +} + +std::vector<ImageChunk> ConstructImageChunks( + const std::vector<uint8_t>& content, const std::vector<std::tuple<std::string, size_t>>& info) { + std::vector<ImageChunk> chunks; + size_t start = 0; + for (const auto& t : info) { + size_t length = std::get<1>(t); + chunks.emplace_back(CHUNK_NORMAL, start, &content, length, std::get<0>(t)); + start += length; + } + + return chunks; +} + +TEST(ImgdiffTest, zip_mode_split_image_smoke) { + std::vector<uint8_t> content; + content.reserve(4096 * 50); + uint8_t n = 0; + generate_n(back_inserter(content), 4096 * 50, [&n]() { return n++ / 4096; }); + + ZipModeImage tgt_image(false, 4096 * 10); + std::vector<ImageChunk> tgt_chunks = ConstructImageChunks(content, { { "a", 100 }, + { "b", 4096 * 2 }, + { "c", 4096 * 3 }, + { "d", 300 }, + { "e-0", 4096 * 10 }, + { "e-1", 4096 * 5 }, + { "CD", 200 } }); + tgt_image.Initialize(std::move(tgt_chunks), + std::vector<uint8_t>(content.begin(), content.begin() + 82520)); + + tgt_image.DumpChunks(); + + ZipModeImage src_image(true, 4096 * 10); + std::vector<ImageChunk> src_chunks = ConstructImageChunks(content, { { "b", 4096 * 3 }, + { "c-0", 4096 * 10 }, + { "c-1", 4096 * 2 }, + { "a", 4096 * 5 }, + { "e-0", 4096 * 10 }, + { "e-1", 10000 }, + { "CD", 5000 } }); + src_image.Initialize(std::move(src_chunks), + std::vector<uint8_t>(content.begin(), content.begin() + 137880)); + + std::vector<ZipModeImage> split_tgt_images; + std::vector<ZipModeImage> split_src_images; + std::vector<SortedRangeSet> split_src_ranges; + + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + // src_piece 1: a 5 blocks, b 3 blocks + // src_piece 2: c-0 10 blocks + // src_piece 3: d 0 block, e-0 10 blocks + // src_piece 4: e-1 2 blocks; CD 2 blocks + ASSERT_EQ(split_tgt_images.size(), split_src_images.size()); + ASSERT_EQ(static_cast<size_t>(4), split_tgt_images.size()); + + ASSERT_EQ(static_cast<size_t>(1), split_tgt_images[0].NumOfChunks()); + ASSERT_EQ(static_cast<size_t>(12288), split_tgt_images[0][0].DataLengthForPatch()); + ASSERT_EQ("4,0,3,15,20", split_src_ranges[0].ToString()); + + ASSERT_EQ(static_cast<size_t>(1), split_tgt_images[1].NumOfChunks()); + ASSERT_EQ(static_cast<size_t>(12288), split_tgt_images[1][0].DataLengthForPatch()); + ASSERT_EQ("2,3,13", split_src_ranges[1].ToString()); + + ASSERT_EQ(static_cast<size_t>(1), split_tgt_images[2].NumOfChunks()); + ASSERT_EQ(static_cast<size_t>(40960), split_tgt_images[2][0].DataLengthForPatch()); + ASSERT_EQ("2,20,30", split_src_ranges[2].ToString()); + + ASSERT_EQ(static_cast<size_t>(1), split_tgt_images[3].NumOfChunks()); + ASSERT_EQ(static_cast<size_t>(16984), split_tgt_images[3][0].DataLengthForPatch()); + ASSERT_EQ("2,30,34", split_src_ranges[3].ToString()); +} + +TEST(ImgdiffTest, zip_mode_store_large_apk) { + // Construct src and tgt zip files with limit = 10 blocks. + // src tgt + // 12 blocks 'd' 3 blocks 'a' + // 8 blocks 'c' 3 blocks 'b' + // 3 blocks 'b' 8 blocks 'c' (exceeds limit) + // 3 blocks 'a' 12 blocks 'd' (exceeds limit) + // 3 blocks 'e' + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + construct_store_entry( + { { "a", 3, 'a' }, { "b", 3, 'b' }, { "c", 8, 'c' }, { "d", 12, 'd' }, { "e", 3, 'e' } }, + &tgt_writer); + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_store_entry({ { "d", 12, 'd' }, { "c", 8, 'c' }, { "b", 3, 'b' }, { "a", 3, 'a' } }, + &src_writer); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector<const char*> args = { + "imgdiff", "-z", "--block-limit=10", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 4 pieces of patch.(Rougly 3'a',3'b'; 8'c'; 10'd'; 2'd'3'e') + GenerateAndCheckSplitTarget(debug_dir.path, 4, tgt); +} + +TEST(ImgdiffTest, zip_mode_deflate_large_apk) { + // Generate 50 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 50); + generate_n(back_inserter(random_data), 4096 * 50, []() { return rand() % 256; }); + + // Construct src and tgt zip files with limit = 10 blocks. + // src tgt + // 22 blocks, "d" 4 blocks, "a" + // 5 blocks, "b" 4 blocks, "b" + // 3 blocks, "a" 7 blocks, "c" (exceeds limit) + // 8 blocks, "c" 20 blocks, "d" (exceeds limit) + // 1 block, "f" 2 blocks, "e" + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry( + { { "a", 0, 4 }, { "b", 5, 4 }, { "c", 12, 8 }, { "d", 21, 20 }, { "e", 45, 2 }, + { "f", 48, 1 } }, &tgt_writer, random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + + construct_deflate_entry( + { { "d", 21, 22 }, { "b", 5, 5 }, { "a", 0, 3 }, { "g", 9, 1 }, { "c", 11, 8 }, + { "f", 45, 1 } }, &src_writer, random_data); + + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + ZipModeImage src_image(true, 10 * 4096); + ZipModeImage tgt_image(false, 10 * 4096); + ASSERT_TRUE(src_image.Initialize(src_file.path)); + ASSERT_TRUE(tgt_image.Initialize(tgt_file.path)); + ASSERT_TRUE(ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)); + + src_image.DumpChunks(); + tgt_image.DumpChunks(); + + std::vector<ZipModeImage> split_tgt_images; + std::vector<ZipModeImage> split_src_images; + std::vector<SortedRangeSet> split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + // src_piece 1: a 3 blocks, b 5 blocks + // src_piece 2: c 8 blocks + // src_piece 3: d-0 10 block + // src_piece 4: d-1 10 blocks + // src_piece 5: e 1 block, CD + ASSERT_EQ(split_tgt_images.size(), split_src_images.size()); + ASSERT_EQ(static_cast<size_t>(5), split_tgt_images.size()); + + ASSERT_EQ(static_cast<size_t>(2), split_src_images[0].NumOfChunks()); + ASSERT_EQ("a", split_src_images[0][0].GetEntryName()); + ASSERT_EQ("b", split_src_images[0][1].GetEntryName()); + + ASSERT_EQ(static_cast<size_t>(1), split_src_images[1].NumOfChunks()); + ASSERT_EQ("c", split_src_images[1][0].GetEntryName()); + + ASSERT_EQ(static_cast<size_t>(0), split_src_images[2].NumOfChunks()); + ASSERT_EQ(static_cast<size_t>(0), split_src_images[3].NumOfChunks()); + ASSERT_EQ(static_cast<size_t>(0), split_src_images[4].NumOfChunks()); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + ASSERT_TRUE(ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + patch_file.path, debug_dir.path)); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 5 pieces of patch. ["a","b"; "c"; "d-0"; "d-1"; "e"] + GenerateAndCheckSplitTarget(debug_dir.path, 5, tgt); +} + +TEST(ImgdiffTest, zip_mode_no_match_source) { + // Generate 20 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 20); + generate_n(back_inserter(random_data), 4096 * 20, []() { return rand() % 256; }); + + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry({ { "a", 0, 4 }, { "b", 5, 5 }, { "c", 11, 5 } }, &tgt_writer, + random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + // We don't have a matching source entry. + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_store_entry({ { "d", 1, 'd' } }, &src_writer); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector<const char*> args = { + "imgdiff", "-z", "--block-limit=10", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 1 pieces of patch due to no matching source entry. + GenerateAndCheckSplitTarget(debug_dir.path, 1, tgt); +} + +TEST(ImgdiffTest, zip_mode_large_enough_limit) { + // Generate 20 blocks of random data. + std::string random_data; + random_data.reserve(4096 * 20); + generate_n(back_inserter(random_data), 4096 * 20, []() { return rand() % 256; }); + + TemporaryFile tgt_file; + FILE* tgt_file_ptr = fdopen(tgt_file.fd, "wb"); + ZipWriter tgt_writer(tgt_file_ptr); + + construct_deflate_entry({ { "a", 0, 10 }, { "b", 10, 5 } }, &tgt_writer, random_data); + + ASSERT_EQ(0, tgt_writer.Finish()); + ASSERT_EQ(0, fclose(tgt_file_ptr)); + + // Construct 10 blocks of source. + TemporaryFile src_file; + FILE* src_file_ptr = fdopen(src_file.fd, "wb"); + ZipWriter src_writer(src_file_ptr); + construct_deflate_entry({ { "a", 1, 10 } }, &src_writer, random_data); + ASSERT_EQ(0, src_writer.Finish()); + ASSERT_EQ(0, fclose(src_file_ptr)); + + // Compute patch with a limit of 20 blocks. + TemporaryFile patch_file; + TemporaryDir debug_dir; + std::vector<const char*> args = { + "imgdiff", "-z", "--block-limit=20", android::base::StringPrintf( + "--debug-dir=%s", debug_dir.path).c_str(), src_file.path, tgt_file.path, patch_file.path, + }; + ASSERT_EQ(0, imgdiff(args.size(), args.data())); + + std::string tgt; + ASSERT_TRUE(android::base::ReadFileToString(tgt_file.path, &tgt)); + + // Expect 1 pieces of patch since limit is larger than the zip file size. + GenerateAndCheckSplitTarget(debug_dir.path, 1, tgt); +} |