From 6b03ba7902cd97e305d9354fd6a733ab0d2f10fe Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Wed, 19 Jul 2017 14:16:30 -0700 Subject: Refactor the imgdiff This helps us to add a new mode to handle large APKs in the follow up CL. Changes include: 1. Create a new interface class 'Image' 1. Create subclasses 'ZipModeImage' and 'ImageModeImage' and move the related functions there. Bug: 63542719 Test: recovery_component_test passes Change-Id: I7729b0ba39b19a9c84811636a60dd0a0b1acc2f0 --- applypatch/imgdiff.cpp | 959 +++++++++++++++++++++++++++++-------------------- 1 file changed, 567 insertions(+), 392 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index fc240644f..880265260 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -196,7 +196,8 @@ class ImageChunk { size_t DataLengthForPatch() const; void Dump() const { - printf("type %d start %zu len %zu\n", type_, start_, DataLengthForPatch()); + printf("type: %d, start: %zu, len: %zu, name: %s\n", type_, start_, DataLengthForPatch(), + entry_name_.c_str()); } void SetSourceInfo(const ImageChunk& other); @@ -211,7 +212,7 @@ class ImageChunk { size_t GetHeaderSize(size_t patch_size) const; // Return the offset of the next patch into the patch data. - size_t WriteHeaderToFd(int fd, const std::vector& patch, size_t offset); + size_t WriteHeaderToFd(int fd, const std::vector& patch, size_t offset) const; /* * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob @@ -233,6 +234,14 @@ class ImageChunk { bool IsAdjacentNormal(const ImageChunk& other) const; void MergeAdjacentNormal(const ImageChunk& other); + /* + * Compute a bsdiff patch between |this| and the input source chunks. + * Store the result in the patch_data. + * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used + * repeatedly, pass nullptr if not needed. + */ + bool MakePatch(const ImageChunk& src, std::vector* patch_data, saidx_t** bsdiff_cache); + private: int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW size_t start_; // offset of chunk in the original input file @@ -322,7 +331,7 @@ bool ImageChunk::ChangeChunkToRaw(size_t patch_size) { void ImageChunk::ChangeDeflateChunkToNormal() { if (type_ != CHUNK_DEFLATE) return; type_ = CHUNK_NORMAL; - entry_name_.clear(); + // No need to clear the entry name. uncompressed_data_.clear(); } @@ -345,7 +354,7 @@ size_t ImageChunk::GetHeaderSize(size_t patch_size) const { } } -size_t ImageChunk::WriteHeaderToFd(int fd, const std::vector& patch, size_t offset) { +size_t ImageChunk::WriteHeaderToFd(int fd, const std::vector& patch, size_t offset) const { Write4(fd, type_); switch (type_) { case CHUNK_NORMAL: @@ -393,6 +402,68 @@ void ImageChunk::MergeAdjacentNormal(const ImageChunk& other) { raw_data_len_ = raw_data_len_ + other.raw_data_len_; } +bool ImageChunk::MakePatch(const ImageChunk& src, std::vector* patch_data, + saidx_t** bsdiff_cache) { + if (ChangeChunkToRaw(0)) { + size_t patch_size = DataLengthForPatch(); + patch_data->resize(patch_size); + std::copy(DataForPatch(), DataForPatch() + patch_size, patch_data->begin()); + return true; + } + +#if defined(__ANDROID__) + char ptemp[] = "/data/local/tmp/imgdiff-patch-XXXXXX"; +#else + char ptemp[] = "/tmp/imgdiff-patch-XXXXXX"; +#endif + + int fd = mkstemp(ptemp); + if (fd == -1) { + printf("MakePatch failed to create a temporary file: %s\n", strerror(errno)); + return false; + } + close(fd); + + int r = bsdiff::bsdiff(src.DataForPatch(), src.DataLengthForPatch(), DataForPatch(), + DataLengthForPatch(), ptemp, bsdiff_cache); + if (r != 0) { + printf("bsdiff() failed: %d\n", r); + return false; + } + + android::base::unique_fd patch_fd(open(ptemp, O_RDONLY)); + if (patch_fd == -1) { + printf("failed to open %s: %s\n", ptemp, strerror(errno)); + return false; + } + struct stat st; + if (fstat(patch_fd, &st) != 0) { + printf("failed to stat patch file %s: %s\n", ptemp, strerror(errno)); + return false; + } + + size_t sz = static_cast(st.st_size); + // Change the chunk type to raw if the patch takes less space that way. + if (ChangeChunkToRaw(sz)) { + unlink(ptemp); + size_t patch_size = DataLengthForPatch(); + patch_data->resize(patch_size); + std::copy(DataForPatch(), DataForPatch() + patch_size, patch_data->begin()); + return true; + } + patch_data->resize(sz); + if (!android::base::ReadFully(patch_fd, patch_data->data(), sz)) { + printf("failed to read \"%s\" %s\n", ptemp, strerror(errno)); + unlink(ptemp); + return false; + } + + unlink(ptemp); + SetSourceInfo(src); + + return true; +} + bool ImageChunk::ReconstructDeflateChunk() { if (type_ != CHUNK_DEFLATE) { printf("attempt to reconstruct non-deflate chunk\n"); @@ -458,195 +529,467 @@ bool ImageChunk::TryReconstruction(int level) { return true; } -// EOCD record -// offset 0: signature 0x06054b50, 4 bytes -// offset 4: number of this disk, 2 bytes -// ... -// offset 20: comment length, 2 bytes -// offset 22: comment, n bytes -static bool GetZipFileSize(const std::vector& zip_file, size_t* input_file_size) { - if (zip_file.size() < 22) { - printf("file is too small to be a zip file\n"); - return false; +// Interface for zip_mode and image_mode images. We initialize the image from an input file and +// split the file content into a list of image chunks. +class Image { + public: + explicit Image(bool is_source) : is_source_(is_source) {} + + virtual ~Image() {} + + // Create a list of image chunks from input file. + virtual bool Initialize(const std::string& filename) = 0; + + // Look for runs of adjacent normal chunks and compress them down into a single chunk. (Such + // runs can be produced when deflate chunks are changed to normal chunks.) + void MergeAdjacentNormalChunks(); + + // In zip mode, find the matching deflate source chunk by entry name. Search for normal chunks + // also if |find_normal| is true. + ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false); + + // Write the contents of |patch_data| to |patch_fd|. + bool WritePatchDataToFd(const std::vector>& patch_data, int patch_fd) const; + + void DumpChunks() const; + + // Non const iterators to access the stored ImageChunks. + std::vector::iterator begin() { + return chunks_.begin(); } - // Look for End of central directory record of the zip file, and calculate the actual - // zip_file size. - for (int i = zip_file.size() - 22; i >= 0; i--) { - if (zip_file[i] == 0x50) { - if (get_unaligned(&zip_file[i]) == 0x06054b50) { - // double-check: this archive consists of a single "disk". - CHECK_EQ(get_unaligned(&zip_file[i + 4]), 0); + std::vector::iterator end() { + return chunks_.end(); + } + // Return a pointer to the ith ImageChunk. + ImageChunk* Get(size_t i) { + CHECK_LT(i, chunks_.size()); + return &chunks_[i]; + } - uint16_t comment_length = get_unaligned(&zip_file[i + 20]); - size_t file_size = i + 22 + comment_length; - CHECK_LE(file_size, zip_file.size()); - *input_file_size = file_size; - return true; + size_t NumOfChunks() const { + return chunks_.size(); + } + + protected: + bool ReadFile(const std::string& filename, std::vector* file_content); + + bool is_source_; // True if it's for source chunks. + std::vector chunks_; // Internal storage of ImageChunk. + std::vector file_content_; // Store the whole input file in memory. +}; + +void Image::MergeAdjacentNormalChunks() { + size_t merged_last = 0, cur = 0; + while (cur < chunks_.size()) { + // Look for normal chunks adjacent to the current one. If such chunk exists, extend the + // length of the current normal chunk. + size_t to_check = cur + 1; + while (to_check < chunks_.size() && chunks_[cur].IsAdjacentNormal(chunks_[to_check])) { + chunks_[cur].MergeAdjacentNormal(chunks_[to_check]); + to_check++; + } + + if (merged_last != cur) { + chunks_[merged_last] = std::move(chunks_[cur]); + } + merged_last++; + cur = to_check; + } + if (merged_last < chunks_.size()) { + chunks_.erase(chunks_.begin() + merged_last, chunks_.end()); + } +} + +ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) { + if (name.empty()) { + return nullptr; + } + for (auto& chunk : chunks_) { + if ((chunk.GetType() == CHUNK_DEFLATE || find_normal) && chunk.GetEntryName() == name) { + return &chunk; + } + } + return nullptr; +} + +bool Image::WritePatchDataToFd(const std::vector>& patch_data, + int patch_fd) const { + // Figure out how big the imgdiff file header is going to be, so that we can correctly compute + // the offset of each bsdiff patch within the file. + CHECK_EQ(chunks_.size(), patch_data.size()); + size_t total_header_size = 12; + for (size_t i = 0; i < chunks_.size(); ++i) { + total_header_size += chunks_[i].GetHeaderSize(patch_data[i].size()); + } + + size_t offset = total_header_size; + + // Write out the headers. + if (!android::base::WriteStringToFd("IMGDIFF2", patch_fd)) { + printf("failed to write \"IMGDIFF2\": %s\n", strerror(errno)); + return false; + } + Write4(patch_fd, static_cast(chunks_.size())); + for (size_t i = 0; i < chunks_.size(); ++i) { + printf("chunk %zu: ", i); + offset = chunks_[i].WriteHeaderToFd(patch_fd, patch_data[i], offset); + } + + // Append each chunk's bsdiff patch, in order. + for (size_t i = 0; i < chunks_.size(); ++i) { + if (chunks_[i].GetType() != CHUNK_RAW) { + if (!android::base::WriteFully(patch_fd, patch_data[i].data(), patch_data[i].size())) { + printf("failed to write %zu bytes patch for chunk %zu\n", patch_data[i].size(), i); + return false; } } } - // EOCD not found, this file is likely not a valid zip file. - return false; + return true; +} + +void Image::DumpChunks() const { + std::string type = is_source_ ? "source" : "target"; + printf("Dumping chunks for %s\n", type.c_str()); + for (size_t i = 0; i < chunks_.size(); ++i) { + printf("chunk %zu: ", i); + chunks_[i].Dump(); + } } -static bool ReadZip(const char* filename, std::vector* chunks, - std::vector* zip_file, bool include_pseudo_chunk) { - CHECK(chunks != nullptr && zip_file != nullptr); +bool Image::ReadFile(const std::string& filename, std::vector* file_content) { + CHECK(file_content != nullptr); - android::base::unique_fd fd(open(filename, O_RDONLY)); + android::base::unique_fd fd(open(filename.c_str(), O_RDONLY)); if (fd == -1) { - printf("failed to open \"%s\" %s\n", filename, strerror(errno)); + printf("failed to open \"%s\" %s\n", filename.c_str(), strerror(errno)); return false; } struct stat st; if (fstat(fd, &st) != 0) { - printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); + printf("failed to stat \"%s\": %s\n", filename.c_str(), strerror(errno)); return false; } size_t sz = static_cast(st.st_size); - zip_file->resize(sz); - if (!android::base::ReadFully(fd, zip_file->data(), sz)) { - printf("failed to read \"%s\" %s\n", filename, strerror(errno)); + file_content->resize(sz); + if (!android::base::ReadFully(fd, file_content->data(), sz)) { + printf("failed to read \"%s\" %s\n", filename.c_str(), strerror(errno)); return false; } fd.reset(); - // Trim the trailing zeros before we pass the file to ziparchive handler. + return true; +} + +class ZipModeImage : public Image { + public: + explicit ZipModeImage(bool is_source) : Image(is_source) {} + + bool Initialize(const std::string& filename) override; + + const ImageChunk& PseudoSource() const { + CHECK(is_source_); + CHECK(pseudo_source_ != nullptr); + return *pseudo_source_; + } + + // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if + // src and tgt are identical. + static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image); + + // Compute the patches against the input image, and write the data into |patch_name|. + static bool GeneratePatches(ZipModeImage* tgt_image, ZipModeImage* src_image, + const std::string& patch_name); + + private: + // Initialize image chunks based on the zip entries. + bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); + // Add the a zip entry to the list. + bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); + // Return the real size of the zip file. (omit the trailing zeros that used for alignment) + bool GetZipFileSize(size_t* input_file_size); + + // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in + // fact the whole source file. + std::unique_ptr pseudo_source_; +}; + +bool ZipModeImage::Initialize(const std::string& filename) { + if (!ReadFile(filename, &file_content_)) { + return false; + } + + // Omit the trailing zeros before we pass the file to ziparchive handler. size_t zipfile_size; - if (!GetZipFileSize(*zip_file, &zipfile_size)) { - printf("failed to parse the actual size of %s\n", filename); + if (!GetZipFileSize(&zipfile_size)) { + printf("failed to parse the actual size of %s\n", filename.c_str()); return false; } ZipArchiveHandle handle; - int err = OpenArchiveFromMemory(zip_file->data(), zipfile_size, filename, &handle); + int err = OpenArchiveFromMemory(const_cast(file_content_.data()), zipfile_size, + filename.c_str(), &handle); if (err != 0) { - printf("failed to open zip file %s: %s\n", filename, ErrorCodeString(err)); + printf("failed to open zip file %s: %s\n", filename.c_str(), ErrorCodeString(err)); CloseArchive(handle); return false; } - // Create a list of deflated zip entries, sorted by offset. - std::vector> temp_entries; + if (is_source_) { + pseudo_source_ = std::make_unique(CHUNK_NORMAL, 0, &file_content_, zipfile_size); + } + if (!InitializeChunks(filename, handle)) { + CloseArchive(handle); + return false; + } + + CloseArchive(handle); + return true; +} + +// Iterate the zip entries and compose the image chunks accordingly. +bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandle handle) { void* cookie; int ret = StartIteration(handle, &cookie, nullptr, nullptr); if (ret != 0) { - printf("failed to iterate over entries in %s: %s\n", filename, ErrorCodeString(ret)); - CloseArchive(handle); + printf("failed to iterate over entries in %s: %s\n", filename.c_str(), ErrorCodeString(ret)); return false; } + // Create a list of deflated zip entries, sorted by offset. + std::vector> temp_entries; ZipString name; ZipEntry entry; while ((ret = Next(cookie, &entry, &name)) == 0) { if (entry.method == kCompressDeflated) { - std::string entryname(name.name, name.name + name.name_length); - temp_entries.push_back(std::make_pair(entryname, entry)); + std::string entry_name(name.name, name.name + name.name_length); + temp_entries.emplace_back(entry_name, entry); } } if (ret != -1) { printf("Error while iterating over zip entries: %s\n", ErrorCodeString(ret)); - CloseArchive(handle); return false; } std::sort(temp_entries.begin(), temp_entries.end(), - [](auto& entry1, auto& entry2) { - return entry1.second.offset < entry2.second.offset; - }); + [](auto& entry1, auto& entry2) { return entry1.second.offset < entry2.second.offset; }); EndIteration(cookie); - if (include_pseudo_chunk) { - chunks->emplace_back(CHUNK_NORMAL, 0, zip_file, zip_file->size()); + // For source chunks, we don't need to compose chunks for the metadata. + if (is_source_) { + for (auto& entry : temp_entries) { + if (!AddZipEntryToChunks(handle, entry.first, &entry.second)) { + printf("Failed to add %s to source chunks\n", entry.first.c_str()); + return false; + } + } + return true; } + // For target chunks, add the deflate entries as CHUNK_DEFLATE and the contents between two + // deflate entries as CHUNK_NORMAL. size_t pos = 0; size_t nextentry = 0; - while (pos < zip_file->size()) { + while (pos < file_content_.size()) { if (nextentry < temp_entries.size() && static_cast(pos) == temp_entries[nextentry].second.offset) { - // compose the next deflate chunk. - std::string entryname = temp_entries[nextentry].first; - size_t uncompressed_len = temp_entries[nextentry].second.uncompressed_length; - std::vector uncompressed_data(uncompressed_len); - if ((ret = ExtractToMemory(handle, &temp_entries[nextentry].second, uncompressed_data.data(), - uncompressed_len)) != 0) { - printf("failed to extract %s with size %zu: %s\n", entryname.c_str(), uncompressed_len, - ErrorCodeString(ret)); - CloseArchive(handle); + // Add the next zip entry. + std::string entry_name = temp_entries[nextentry].first; + if (!AddZipEntryToChunks(handle, entry_name, &temp_entries[nextentry].second)) { + printf("Failed to add %s to target chunks\n", entry_name.c_str()); return false; } - size_t compressed_len = temp_entries[nextentry].second.compressed_length; - ImageChunk curr(CHUNK_DEFLATE, pos, zip_file, compressed_len); - curr.SetEntryName(std::move(entryname)); - curr.SetUncompressedData(std::move(uncompressed_data)); - chunks->push_back(curr); - - pos += compressed_len; + pos += temp_entries[nextentry].second.compressed_length; ++nextentry; continue; } - // Use a normal chunk to take all the data up to the start of the next deflate section. + // Use a normal chunk to take all the data up to the start of the next entry. size_t raw_data_len; if (nextentry < temp_entries.size()) { raw_data_len = temp_entries[nextentry].second.offset - pos; } else { - raw_data_len = zip_file->size() - pos; + raw_data_len = file_content_.size() - pos; } - chunks->emplace_back(CHUNK_NORMAL, pos, zip_file, raw_data_len); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, raw_data_len); pos += raw_data_len; } - CloseArchive(handle); return true; } -// Read the given file and break it up into chunks, and putting the data in to a vector. -static bool ReadImage(const char* filename, std::vector* chunks, - std::vector* img) { - CHECK(chunks != nullptr && img != nullptr); +bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, + ZipEntry* entry) { + size_t compressed_len = entry->compressed_length; + if (entry->method == kCompressDeflated) { + size_t uncompressed_len = entry->uncompressed_length; + std::vector uncompressed_data(uncompressed_len); + int ret = ExtractToMemory(handle, entry, uncompressed_data.data(), uncompressed_len); + if (ret != 0) { + printf("failed to extract %s with size %zu: %s\n", entry_name.c_str(), uncompressed_len, + ErrorCodeString(ret)); + return false; + } + ImageChunk curr(CHUNK_DEFLATE, entry->offset, &file_content_, compressed_len); + curr.SetEntryName(entry_name); + curr.SetUncompressedData(std::move(uncompressed_data)); + chunks_.push_back(curr); + } else { + ImageChunk curr(CHUNK_NORMAL, entry->offset, &file_content_, compressed_len); + curr.SetEntryName(entry_name); + chunks_.push_back(curr); + } - android::base::unique_fd fd(open(filename, O_RDONLY)); - if (fd == -1) { - printf("failed to open \"%s\" %s\n", filename, strerror(errno)); + return true; +} + +// EOCD record +// offset 0: signature 0x06054b50, 4 bytes +// offset 4: number of this disk, 2 bytes +// ... +// offset 20: comment length, 2 bytes +// offset 22: comment, n bytes +bool ZipModeImage::GetZipFileSize(size_t* input_file_size) { + if (file_content_.size() < 22) { + printf("file is too small to be a zip file\n"); return false; } - struct stat st; - if (fstat(fd, &st) != 0) { - printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); + + // Look for End of central directory record of the zip file, and calculate the actual + // zip_file size. + for (int i = file_content_.size() - 22; i >= 0; i--) { + if (file_content_[i] == 0x50) { + if (get_unaligned(&file_content_[i]) == 0x06054b50) { + // double-check: this archive consists of a single "disk". + CHECK_EQ(get_unaligned(&file_content_[i + 4]), 0); + + uint16_t comment_length = get_unaligned(&file_content_[i + 20]); + size_t file_size = i + 22 + comment_length; + CHECK_LE(file_size, file_content_.size()); + *input_file_size = file_size; + return true; + } + } + } + + // EOCD not found, this file is likely not a valid zip file. + return false; +} + +bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image) { + for (auto& tgt_chunk : *tgt_image) { + if (tgt_chunk.GetType() != CHUNK_DEFLATE) { + continue; + } + + ImageChunk* src_chunk = src_image->FindChunkByName(tgt_chunk.GetEntryName()); + if (src_chunk == nullptr) { + tgt_chunk.ChangeDeflateChunkToNormal(); + } else if (tgt_chunk == *src_chunk) { + // If two deflate chunks are identical (eg, the kernel has not changed between two builds), + // treat them as normal chunks. This makes applypatch much faster -- it can apply a trivial + // patch to the compressed data, rather than uncompressing and recompressing to apply the + // trivial patch to the uncompressed data. + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk->ChangeDeflateChunkToNormal(); + } else if (!tgt_chunk.ReconstructDeflateChunk()) { + // We cannot recompress the data and get exactly the same bits as are in the input target + // image. Treat the chunk as a normal non-deflated chunk. + printf("failed to reconstruct target deflate chunk [%s]; treating as normal\n", + tgt_chunk.GetEntryName().c_str()); + + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk->ChangeDeflateChunkToNormal(); + } + } + + return true; +} + +bool ZipModeImage::GeneratePatches(ZipModeImage* tgt_image, ZipModeImage* src_image, + const std::string& patch_name) { + // For zips, we only need merge normal chunks for the target: deflated chunks are matched via + // filename, and normal chunks are patched using the entire source file as the source. + tgt_image->MergeAdjacentNormalChunks(); + tgt_image->DumpChunks(); + + printf("Construct patches for %zu chunks...\n", tgt_image->NumOfChunks()); + std::vector> patch_data(tgt_image->NumOfChunks()); + + saidx_t* bsdiff_cache = nullptr; + size_t i = 0; + for (auto& tgt_chunk : *tgt_image) { + ImageChunk* src_chunk = (tgt_chunk.GetType() != CHUNK_DEFLATE) + ? nullptr + : src_image->FindChunkByName(tgt_chunk.GetEntryName()); + + const auto& src_ref = (src_chunk == nullptr) ? src_image->PseudoSource() : *src_chunk; + saidx_t** bsdiff_cache_ptr = (src_chunk == nullptr) ? &bsdiff_cache : nullptr; + + if (!tgt_chunk.MakePatch(src_ref, &patch_data[i], bsdiff_cache_ptr)) { + printf("Failed to generate patch, name: %s\n", tgt_chunk.GetEntryName().c_str()); + return false; + } + + printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data[i].size(), + tgt_chunk.GetRawDataLength()); + i++; + } + free(bsdiff_cache); + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); return false; } - size_t sz = static_cast(st.st_size); - img->resize(sz); - if (!android::base::ReadFully(fd, img->data(), sz)) { - printf("failed to read \"%s\" %s\n", filename, strerror(errno)); + return tgt_image->WritePatchDataToFd(patch_data, patch_fd); +} + +class ImageModeImage : public Image { + public: + explicit ImageModeImage(bool is_source) : Image(is_source) {} + + // Initialize the image chunks list by searching the magic numbers in an image file. + bool Initialize(const std::string& filename) override; + + // In Image Mode, verify that the source and target images have the same chunk structure (ie, the + // same sequence of deflate and normal chunks). + static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image); + + // In image mode, generate patches against the given source chunks and bonus_data; write the + // result to |patch_name|. + static bool GeneratePatches(ImageModeImage* tgt_image, ImageModeImage* src_image, + const std::vector& bonus_data, const std::string& patch_name); +}; + +bool ImageModeImage::Initialize(const std::string& filename) { + if (!ReadFile(filename, &file_content_)) { return false; } + size_t sz = file_content_.size(); size_t pos = 0; - while (pos < sz) { // 0x00 no header flags, 0x08 deflate compression, 0x1f8b gzip magic number - if (sz - pos >= 4 && get_unaligned(img->data() + pos) == 0x00088b1f) { + if (sz - pos >= 4 && get_unaligned(file_content_.data() + pos) == 0x00088b1f) { // 'pos' is the offset of the start of a gzip chunk. size_t chunk_offset = pos; // The remaining data is too small to be a gzip chunk; treat them as a normal chunk. if (sz - pos < GZIP_HEADER_LEN + GZIP_FOOTER_LEN) { - chunks->emplace_back(CHUNK_NORMAL, pos, img, sz - pos); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, sz - pos); break; } // We need three chunks for the deflated image in total, one normal chunk for the header, // one deflated chunk for the body, and another normal chunk for the footer. - chunks->emplace_back(CHUNK_NORMAL, pos, img, GZIP_HEADER_LEN); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, GZIP_HEADER_LEN); pos += GZIP_HEADER_LEN; // We must decompress this chunk in order to discover where it ends, and so we can update @@ -657,7 +1000,7 @@ static bool ReadImage(const char* filename, std::vector* chunks, strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = sz - pos; - strm.next_in = img->data() + pos; + strm.next_in = file_content_.data() + pos; // -15 means we are decoding a 'raw' deflate stream; zlib will // not expect zlib headers. @@ -700,22 +1043,22 @@ static bool ReadImage(const char* filename, std::vector* chunks, printf("Warning: invalid footer position; treating as a nomal chunk\n"); continue; } - size_t footer_size = get_unaligned(img->data() + footer_index); + size_t footer_size = get_unaligned(file_content_.data() + footer_index); if (footer_size != uncompressed_len) { printf("Warning: footer size %zu != decompressed size %zu; treating as a nomal chunk\n", footer_size, uncompressed_len); continue; } - ImageChunk body(CHUNK_DEFLATE, pos, img, raw_data_len); + ImageChunk body(CHUNK_DEFLATE, pos, &file_content_, raw_data_len); uncompressed_data.resize(uncompressed_len); body.SetUncompressedData(std::move(uncompressed_data)); - chunks->push_back(body); + chunks_.push_back(body); pos += raw_data_len; // create a normal chunk for the footer - chunks->emplace_back(CHUNK_NORMAL, pos, img, GZIP_FOOTER_LEN); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, GZIP_FOOTER_LEN); pos += GZIP_FOOTER_LEN; } else { @@ -726,12 +1069,12 @@ static bool ReadImage(const char* filename, std::vector* chunks, size_t data_len = 0; while (data_len + pos < sz) { if (data_len + pos + 4 <= sz && - get_unaligned(img->data() + pos + data_len) == 0x00088b1f) { + get_unaligned(file_content_.data() + pos + data_len) == 0x00088b1f) { break; } data_len++; } - chunks->emplace_back(CHUNK_NORMAL, pos, img, data_len); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, data_len); pos += data_len; } @@ -740,346 +1083,178 @@ static bool ReadImage(const char* filename, std::vector* chunks, return true; } -/* - * Given source and target chunks, compute a bsdiff patch between them. - * Store the result in the patch_data. - * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk - * is used repeatedly, pass nullptr if not needed. - */ -static bool MakePatch(const ImageChunk* src, ImageChunk* tgt, std::vector* patch_data, - saidx_t** bsdiff_cache) { - if (tgt->ChangeChunkToRaw(0)) { - size_t patch_size = tgt->DataLengthForPatch(); - patch_data->resize(patch_size); - std::copy(tgt->DataForPatch(), tgt->DataForPatch() + patch_size, patch_data->begin()); - return true; - } - -#if defined(__ANDROID__) - char ptemp[] = "/data/local/tmp/imgdiff-patch-XXXXXX"; -#else - char ptemp[] = "/tmp/imgdiff-patch-XXXXXX"; -#endif - - int fd = mkstemp(ptemp); - if (fd == -1) { - printf("MakePatch failed to create a temporary file: %s\n", strerror(errno)); +// In Image Mode, verify that the source and target images have the same chunk structure (ie, the +// same sequence of deflate and normal chunks). +bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image) { + // In image mode, merge the gzip header and footer in with any adjacent normal chunks. + tgt_image->MergeAdjacentNormalChunks(); + src_image->MergeAdjacentNormalChunks(); + + if (tgt_image->NumOfChunks() != src_image->NumOfChunks()) { + printf("source and target don't have same number of chunks!\n"); + tgt_image->DumpChunks(); + src_image->DumpChunks(); return false; } - close(fd); - - int r = bsdiff::bsdiff(src->DataForPatch(), src->DataLengthForPatch(), tgt->DataForPatch(), - tgt->DataLengthForPatch(), ptemp, bsdiff_cache); - if (r != 0) { - printf("bsdiff() failed: %d\n", r); - return false; + for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { + if (tgt_image->Get(i)->GetType() != src_image->Get(i)->GetType()) { + printf("source and target don't have same chunk structure! (chunk %zu)\n", i); + tgt_image->DumpChunks(); + src_image->DumpChunks(); + return false; + } } - android::base::unique_fd patch_fd(open(ptemp, O_RDONLY)); - if (patch_fd == -1) { - printf("failed to open %s: %s\n", ptemp, strerror(errno)); - return false; - } - struct stat st; - if (fstat(patch_fd, &st) != 0) { - printf("failed to stat patch file %s: %s\n", ptemp, strerror(errno)); - return false; - } + for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { + auto& tgt_chunk = *tgt_image->Get(i); + auto& src_chunk = *src_image->Get(i); + if (tgt_chunk.GetType() != CHUNK_DEFLATE) { + continue; + } - size_t sz = static_cast(st.st_size); - // Change the chunk type to raw if the patch takes less space that way. - if (tgt->ChangeChunkToRaw(sz)) { - unlink(ptemp); - size_t patch_size = tgt->DataLengthForPatch(); - patch_data->resize(patch_size); - std::copy(tgt->DataForPatch(), tgt->DataForPatch() + patch_size, patch_data->begin()); - return true; + // Confirm that we can recompress the data and get exactly the same bits as are in the + // input target image. + if (!tgt_chunk.ReconstructDeflateChunk()) { + printf("failed to reconstruct target deflate chunk %zu [%s]; treating as normal\n", i, + tgt_chunk.GetEntryName().c_str()); + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk.ChangeDeflateChunkToNormal(); + continue; + } + + // If two deflate chunks are identical treat them as normal chunks. + if (tgt_chunk == src_chunk) { + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk.ChangeDeflateChunkToNormal(); + } } - patch_data->resize(sz); - if (!android::base::ReadFully(patch_fd, patch_data->data(), sz)) { - printf("failed to read \"%s\" %s\n", ptemp, strerror(errno)); + + // For images, we need to maintain the parallel structure of the chunk lists, so do the merging + // in both the source and target lists. + tgt_image->MergeAdjacentNormalChunks(); + src_image->MergeAdjacentNormalChunks(); + if (tgt_image->NumOfChunks() != src_image->NumOfChunks()) { + // This shouldn't happen. + printf("merging normal chunks went awry\n"); return false; } - unlink(ptemp); - tgt->SetSourceInfo(*src); - return true; } -/* - * Look for runs of adjacent normal chunks and compress them down into - * a single chunk. (Such runs can be produced when deflate chunks are - * changed to normal chunks.) - */ -static void MergeAdjacentNormalChunks(std::vector* chunks) { - size_t merged_last = 0, cur = 0; - while (cur < chunks->size()) { - // Look for normal chunks adjacent to the current one. If such chunk exists, extend the - // length of the current normal chunk. - size_t to_check = cur + 1; - while (to_check < chunks->size() && chunks->at(cur).IsAdjacentNormal(chunks->at(to_check))) { - chunks->at(cur).MergeAdjacentNormal(chunks->at(to_check)); - to_check++; +// In image mode, generate patches against the given source chunks and bonus_data; write the +// result to |patch_name|. +bool ImageModeImage::GeneratePatches(ImageModeImage* tgt_image, ImageModeImage* src_image, + const std::vector& bonus_data, + const std::string& patch_name) { + printf("Construct patches for %zu chunks...\n", tgt_image->NumOfChunks()); + std::vector> patch_data(tgt_image->NumOfChunks()); + + for (size_t i = 0; i < tgt_image->NumOfChunks(); i++) { + auto& tgt_chunk = *tgt_image->Get(i); + auto& src_chunk = *src_image->Get(i); + + if (i == 1 && !bonus_data.empty()) { + printf(" using %zu bytes of bonus data for chunk %zu\n", bonus_data.size(), i); + src_chunk.SetBonusData(bonus_data); } - if (merged_last != cur) { - chunks->at(merged_last) = std::move(chunks->at(cur)); + if (!tgt_chunk.MakePatch(src_chunk, &patch_data[i], nullptr)) { + printf("Failed to generate patch for target chunk %zu: ", i); + return false; } - merged_last++; - cur = to_check; - } - if (merged_last < chunks->size()) { - chunks->erase(chunks->begin() + merged_last, chunks->end()); + printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data[i].size(), + tgt_chunk.GetRawDataLength()); } -} -static ImageChunk* FindChunkByName(const std::string& name, std::vector& chunks) { - for (size_t i = 0; i < chunks.size(); ++i) { - if (chunks[i].GetType() == CHUNK_DEFLATE && chunks[i].GetEntryName() == name) { - return &chunks[i]; - } + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + return false; } - return nullptr; -} -static void DumpChunks(const std::vector& chunks) { - for (size_t i = 0; i < chunks.size(); ++i) { - printf("chunk %zu: ", i); - chunks[i].Dump(); - } + return tgt_image->WritePatchDataToFd(patch_data, patch_fd); } int imgdiff(int argc, const char** argv) { bool zip_mode = false; + std::vector bonus_data; - if (argc >= 2 && strcmp(argv[1], "-z") == 0) { - zip_mode = true; - --argc; - ++argv; - } + int opt; + optind = 1; // Reset the getopt state so that we can call it multiple times for test. - std::vector bonus_data; - if (argc >= 3 && strcmp(argv[1], "-b") == 0) { - android::base::unique_fd fd(open(argv[2], O_RDONLY)); - if (fd == -1) { - printf("failed to open bonus file %s: %s\n", argv[2], strerror(errno)); - return 1; - } - struct stat st; - if (fstat(fd, &st) != 0) { - printf("failed to stat bonus file %s: %s\n", argv[2], strerror(errno)); - return 1; - } + while ((opt = getopt(argc, const_cast(argv), "zb:")) != -1) { + switch (opt) { + case 'z': + zip_mode = true; + break; + case 'b': { + android::base::unique_fd fd(open(optarg, O_RDONLY)); + if (fd == -1) { + printf("failed to open bonus file %s: %s\n", optarg, strerror(errno)); + return 1; + } + struct stat st; + if (fstat(fd, &st) != 0) { + printf("failed to stat bonus file %s: %s\n", optarg, strerror(errno)); + return 1; + } - size_t bonus_size = st.st_size; - bonus_data.resize(bonus_size); - if (!android::base::ReadFully(fd, bonus_data.data(), bonus_size)) { - printf("failed to read bonus file %s: %s\n", argv[2], strerror(errno)); - return 1; + size_t bonus_size = st.st_size; + bonus_data.resize(bonus_size); + if (!android::base::ReadFully(fd, bonus_data.data(), bonus_size)) { + printf("failed to read bonus file %s: %s\n", optarg, strerror(errno)); + return 1; + } + break; + } + default: + printf("unexpected opt: %s\n", optarg); + return 2; } - - argc -= 2; - argv += 2; } - if (argc != 4) { - printf("usage: %s [-z] [-b ] \n", - argv[0]); + if (argc - optind != 3) { + printf("usage: %s [-z] [-b ] \n", argv[0]); return 2; } - std::vector src_chunks; - std::vector tgt_chunks; - std::vector src_file; - std::vector tgt_file; - if (zip_mode) { - if (!ReadZip(argv[1], &src_chunks, &src_file, true)) { - printf("failed to break apart source zip file\n"); + ZipModeImage src_image(true); + ZipModeImage tgt_image(false); + + if (!src_image.Initialize(argv[optind])) { return 1; } - if (!ReadZip(argv[2], &tgt_chunks, &tgt_file, false)) { - printf("failed to break apart target zip file\n"); + if (!tgt_image.Initialize(argv[optind + 1])) { return 1; } - } else { - if (!ReadImage(argv[1], &src_chunks, &src_file)) { - printf("failed to break apart source image\n"); + + if (!ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { return 1; } - if (!ReadImage(argv[2], &tgt_chunks, &tgt_file)) { - printf("failed to break apart target image\n"); + // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of + // deflate chunks). + if (!ZipModeImage::GeneratePatches(&tgt_image, &src_image, argv[optind + 2])) { return 1; } + } else { + ImageModeImage src_image(true); + ImageModeImage tgt_image(false); - // Verify that the source and target images have the same chunk - // structure (ie, the same sequence of deflate and normal chunks). - - // Merge the gzip header and footer in with any adjacent normal chunks. - MergeAdjacentNormalChunks(&tgt_chunks); - MergeAdjacentNormalChunks(&src_chunks); - - if (src_chunks.size() != tgt_chunks.size()) { - printf("source and target don't have same number of chunks!\n"); - printf("source chunks:\n"); - DumpChunks(src_chunks); - printf("target chunks:\n"); - DumpChunks(tgt_chunks); + if (!src_image.Initialize(argv[optind])) { return 1; } - for (size_t i = 0; i < src_chunks.size(); ++i) { - if (src_chunks[i].GetType() != tgt_chunks[i].GetType()) { - printf("source and target don't have same chunk structure! (chunk %zu)\n", i); - printf("source chunks:\n"); - DumpChunks(src_chunks); - printf("target chunks:\n"); - DumpChunks(tgt_chunks); - return 1; - } - } - } - - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - if (tgt_chunks[i].GetType() == CHUNK_DEFLATE) { - // Confirm that given the uncompressed chunk data in the target, we - // can recompress it and get exactly the same bits as are in the - // input target image. If this fails, treat the chunk as a normal - // non-deflated chunk. - if (!tgt_chunks[i].ReconstructDeflateChunk()) { - printf("failed to reconstruct target deflate chunk %zu [%s]; treating as normal\n", i, - tgt_chunks[i].GetEntryName().c_str()); - tgt_chunks[i].ChangeDeflateChunkToNormal(); - if (zip_mode) { - ImageChunk* src = FindChunkByName(tgt_chunks[i].GetEntryName(), src_chunks); - if (src != nullptr) { - src->ChangeDeflateChunkToNormal(); - } - } else { - src_chunks[i].ChangeDeflateChunkToNormal(); - } - continue; - } - - // If two deflate chunks are identical (eg, the kernel has not - // changed between two builds), treat them as normal chunks. - // This makes applypatch much faster -- it can apply a trivial - // patch to the compressed data, rather than uncompressing and - // recompressing to apply the trivial patch to the uncompressed - // data. - ImageChunk* src; - if (zip_mode) { - src = FindChunkByName(tgt_chunks[i].GetEntryName(), src_chunks); - } else { - src = &src_chunks[i]; - } - - if (src == nullptr) { - tgt_chunks[i].ChangeDeflateChunkToNormal(); - } else if (tgt_chunks[i] == *src) { - tgt_chunks[i].ChangeDeflateChunkToNormal(); - src->ChangeDeflateChunkToNormal(); - } - } - } - - // Merging neighboring normal chunks. - if (zip_mode) { - // For zips, we only need to do this to the target: deflated - // chunks are matched via filename, and normal chunks are patched - // using the entire source file as the source. - MergeAdjacentNormalChunks(&tgt_chunks); - - } else { - // For images, we need to maintain the parallel structure of the - // chunk lists, so do the merging in both the source and target - // lists. - MergeAdjacentNormalChunks(&tgt_chunks); - MergeAdjacentNormalChunks(&src_chunks); - if (src_chunks.size() != tgt_chunks.size()) { - // This shouldn't happen. - printf("merging normal chunks went awry\n"); + if (!tgt_image.Initialize(argv[optind + 1])) { return 1; } - } - - // Compute bsdiff patches for each chunk's data (the uncompressed - // data, in the case of deflate chunks). - - DumpChunks(src_chunks); - printf("Construct patches for %zu chunks...\n", tgt_chunks.size()); - std::vector> patch_data(tgt_chunks.size()); - saidx_t* bsdiff_cache = nullptr; - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - if (zip_mode) { - ImageChunk* src; - if (tgt_chunks[i].GetType() == CHUNK_DEFLATE && - (src = FindChunkByName(tgt_chunks[i].GetEntryName(), src_chunks))) { - if (!MakePatch(src, &tgt_chunks[i], &patch_data[i], nullptr)) { - printf("Failed to generate patch for target chunk %zu: ", i); - return 1; - } - } else { - if (!MakePatch(&src_chunks[0], &tgt_chunks[i], &patch_data[i], &bsdiff_cache)) { - printf("Failed to generate patch for target chunk %zu: ", i); - return 1; - } - } - } else { - if (i == 1 && !bonus_data.empty()) { - printf(" using %zu bytes of bonus data for chunk %zu\n", bonus_data.size(), i); - src_chunks[i].SetBonusData(bonus_data); - } - - if (!MakePatch(&src_chunks[i], &tgt_chunks[i], &patch_data[i], nullptr)) { - printf("Failed to generate patch for target chunk %zu: ", i); - return 1; - } + if (!ImageModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { + return 1; } - printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data[i].size(), - src_chunks[i].GetRawDataLength()); - } - - if (bsdiff_cache != nullptr) { - free(bsdiff_cache); - } - - // Figure out how big the imgdiff file header is going to be, so - // that we can correctly compute the offset of each bsdiff patch - // within the file. - - size_t total_header_size = 12; - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - total_header_size += tgt_chunks[i].GetHeaderSize(patch_data[i].size()); - } - - size_t offset = total_header_size; - - android::base::unique_fd patch_fd(open(argv[3], O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); - if (patch_fd == -1) { - printf("failed to open \"%s\": %s\n", argv[3], strerror(errno)); - return 1; - } - - // Write out the headers. - if (!android::base::WriteStringToFd("IMGDIFF2", patch_fd)) { - printf("failed to write \"IMGDIFF2\" to \"%s\": %s\n", argv[3], strerror(errno)); - return 1; - } - Write4(patch_fd, static_cast(tgt_chunks.size())); - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - printf("chunk %zu: ", i); - offset = tgt_chunks[i].WriteHeaderToFd(patch_fd, patch_data[i], offset); - } - - // Append each chunk's bsdiff patch, in order. - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - if (tgt_chunks[i].GetType() != CHUNK_RAW) { - if (!android::base::WriteFully(patch_fd, patch_data[i].data(), patch_data[i].size())) { - CHECK(false) << "failed to write " << patch_data[i].size() << " bytes patch for chunk " - << i; - } + if (!ImageModeImage::GeneratePatches(&tgt_image, &src_image, bonus_data, argv[optind + 2])) { + return 1; } } -- cgit v1.2.3 From d82a2ed50bab5ea014bcf3b1e5541e21829b4ecb Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Tue, 8 Aug 2017 17:35:01 -0700 Subject: Add a new PatchChunk class in imgdiff This way we can keep the input images const when calling genetatepatches(). Test: recovery component test; diff and patch on chrome.apk; generate recovery-from-boot.p for angler. Change-Id: I65b5689b88f6719c6ede46bb82def0c4caeb8a61 --- applypatch/imgdiff.cpp | 528 +++++++++++++++++++++++++++---------------------- 1 file changed, 292 insertions(+), 236 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 880265260..a81e385a3 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -168,15 +168,14 @@ class ImageChunk { static constexpr auto METHOD = Z_DEFLATED; static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY; - ImageChunk(int type, size_t start, const std::vector* file_content, size_t raw_data_len) + ImageChunk(int type, size_t start, const std::vector* file_content, size_t raw_data_len, + std::string entry_name = {}) : type_(type), start_(start), input_file_ptr_(file_content), raw_data_len_(raw_data_len), compress_level_(6), - source_start_(0), - source_len_(0), - source_uncompressed_len_(0) { + entry_name_(std::move(entry_name)) { CHECK(file_content != nullptr) << "input file container can't be nullptr"; } @@ -189,6 +188,12 @@ class ImageChunk { const std::string& GetEntryName() const { return entry_name_; } + size_t GetStartOffset() const { + return start_; + } + int GetCompressLevel() const { + return compress_level_; + } // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return // the raw data. @@ -200,8 +205,6 @@ class ImageChunk { entry_name_.c_str()); } - void SetSourceInfo(const ImageChunk& other); - void SetEntryName(std::string entryname); void SetUncompressedData(std::vector data); bool SetBonusData(const std::vector& bonus_data); @@ -210,57 +213,46 @@ class ImageChunk { return !(*this == other); } - size_t GetHeaderSize(size_t patch_size) const; - // Return the offset of the next patch into the patch data. - size_t WriteHeaderToFd(int fd, const std::vector& patch, size_t offset) const; - /* - * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob - * of uninterpreted data). The resulting patch will likely be about - * as big as the target file, but it lets us handle the case of images - * where some gzip chunks are reconstructible but others aren't (by - * treating the ones that aren't as normal chunks). + * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data). + * The resulting patch will likely be about as big as the target file, but it lets us handle + * the case of images where some gzip chunks are reconstructible but others aren't (by treating + * the ones that aren't as normal chunks). */ void ChangeDeflateChunkToNormal(); - bool ChangeChunkToRaw(size_t patch_size); /* - * Verify that we can reproduce exactly the same compressed data that - * we started with. Sets the level, method, windowBits, memLevel, and - * strategy fields in the chunk to the encoding parameters needed to - * produce the right output. + * Verify that we can reproduce exactly the same compressed data that we started with. Sets the + * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding + * parameters needed to produce the right output. */ bool ReconstructDeflateChunk(); bool IsAdjacentNormal(const ImageChunk& other) const; void MergeAdjacentNormal(const ImageChunk& other); /* - * Compute a bsdiff patch between |this| and the input source chunks. - * Store the result in the patch_data. + * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data. * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used * repeatedly, pass nullptr if not needed. */ - bool MakePatch(const ImageChunk& src, std::vector* patch_data, saidx_t** bsdiff_cache); + static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src, + std::vector* patch_data, saidx_t** bsdiff_cache); private: + const uint8_t* GetRawData() const; + bool TryReconstruction(int level); + int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW size_t start_; // offset of chunk in the original input file const std::vector* input_file_ptr_; // ptr to the full content of original input file size_t raw_data_len_; - // --- for CHUNK_DEFLATE chunks only: --- - std::vector uncompressed_data_; - std::string entry_name_; // used for zip entries - // deflate encoder parameters int compress_level_; - size_t source_start_; - size_t source_len_; - size_t source_uncompressed_len_; - - const uint8_t* GetRawData() const; - bool TryReconstruction(int level); + // --- for CHUNK_DEFLATE chunks only: --- + std::vector uncompressed_data_; + std::string entry_name_; // used for zip entries }; const uint8_t* ImageChunk::GetRawData() const { @@ -290,20 +282,6 @@ bool ImageChunk::operator==(const ImageChunk& other) const { memcmp(GetRawData(), other.GetRawData(), raw_data_len_) == 0); } -void ImageChunk::SetSourceInfo(const ImageChunk& src) { - source_start_ = src.start_; - if (type_ == CHUNK_NORMAL) { - source_len_ = src.raw_data_len_; - } else if (type_ == CHUNK_DEFLATE) { - source_len_ = src.raw_data_len_; - source_uncompressed_len_ = src.uncompressed_data_.size(); - } -} - -void ImageChunk::SetEntryName(std::string entryname) { - entry_name_ = std::move(entryname); -} - void ImageChunk::SetUncompressedData(std::vector data) { uncompressed_data_ = std::move(data); } @@ -316,18 +294,6 @@ bool ImageChunk::SetBonusData(const std::vector& bonus_data) { return true; } -// Convert CHUNK_NORMAL & CHUNK_DEFLATE to CHUNK_RAW if the target size is -// smaller. Also take the header size into account during size comparison. -bool ImageChunk::ChangeChunkToRaw(size_t patch_size) { - if (type_ == CHUNK_RAW) { - return true; - } else if (type_ == CHUNK_NORMAL && (raw_data_len_ <= 160 || raw_data_len_ < patch_size)) { - type_ = CHUNK_RAW; - return true; - } - return false; -} - void ImageChunk::ChangeDeflateChunkToNormal() { if (type_ != CHUNK_DEFLATE) return; type_ = CHUNK_NORMAL; @@ -335,61 +301,6 @@ void ImageChunk::ChangeDeflateChunkToNormal() { uncompressed_data_.clear(); } -// Header size: -// header_type 4 bytes -// CHUNK_NORMAL 8*3 = 24 bytes -// CHUNK_DEFLATE 8*5 + 4*5 = 60 bytes -// CHUNK_RAW 4 bytes + patch_size -size_t ImageChunk::GetHeaderSize(size_t patch_size) const { - switch (type_) { - case CHUNK_NORMAL: - return 4 + 8 * 3; - case CHUNK_DEFLATE: - return 4 + 8 * 5 + 4 * 5; - case CHUNK_RAW: - return 4 + 4 + patch_size; - default: - CHECK(false) << "unexpected chunk type: " << type_; // Should not reach here. - return 0; - } -} - -size_t ImageChunk::WriteHeaderToFd(int fd, const std::vector& patch, size_t offset) const { - Write4(fd, type_); - switch (type_) { - case CHUNK_NORMAL: - printf("normal (%10zu, %10zu) %10zu\n", start_, raw_data_len_, patch.size()); - Write8(fd, static_cast(source_start_)); - Write8(fd, static_cast(source_len_)); - Write8(fd, static_cast(offset)); - return offset + patch.size(); - case CHUNK_DEFLATE: - printf("deflate (%10zu, %10zu) %10zu %s\n", start_, raw_data_len_, patch.size(), - entry_name_.c_str()); - Write8(fd, static_cast(source_start_)); - Write8(fd, static_cast(source_len_)); - Write8(fd, static_cast(offset)); - Write8(fd, static_cast(source_uncompressed_len_)); - Write8(fd, static_cast(uncompressed_data_.size())); - Write4(fd, compress_level_); - Write4(fd, METHOD); - Write4(fd, WINDOWBITS); - Write4(fd, MEMLEVEL); - Write4(fd, STRATEGY); - return offset + patch.size(); - case CHUNK_RAW: - printf("raw (%10zu, %10zu)\n", start_, raw_data_len_); - Write4(fd, static_cast(patch.size())); - if (!android::base::WriteFully(fd, patch.data(), patch.size())) { - CHECK(false) << "failed to write " << patch.size() <<" bytes patch"; - } - return offset; - default: - CHECK(false) << "unexpected chunk type: " << type_; - return offset; - } -} - bool ImageChunk::IsAdjacentNormal(const ImageChunk& other) const { if (type_ != CHUNK_NORMAL || other.type_ != CHUNK_NORMAL) { return false; @@ -402,15 +313,8 @@ void ImageChunk::MergeAdjacentNormal(const ImageChunk& other) { raw_data_len_ = raw_data_len_ + other.raw_data_len_; } -bool ImageChunk::MakePatch(const ImageChunk& src, std::vector* patch_data, - saidx_t** bsdiff_cache) { - if (ChangeChunkToRaw(0)) { - size_t patch_size = DataLengthForPatch(); - patch_data->resize(patch_size); - std::copy(DataForPatch(), DataForPatch() + patch_size, patch_data->begin()); - return true; - } - +bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, + std::vector* patch_data, saidx_t** bsdiff_cache) { #if defined(__ANDROID__) char ptemp[] = "/data/local/tmp/imgdiff-patch-XXXXXX"; #else @@ -424,8 +328,8 @@ bool ImageChunk::MakePatch(const ImageChunk& src, std::vector* patch_da } close(fd); - int r = bsdiff::bsdiff(src.DataForPatch(), src.DataLengthForPatch(), DataForPatch(), - DataLengthForPatch(), ptemp, bsdiff_cache); + int r = bsdiff::bsdiff(src.DataForPatch(), src.DataLengthForPatch(), tgt.DataForPatch(), + tgt.DataLengthForPatch(), ptemp, bsdiff_cache); if (r != 0) { printf("bsdiff() failed: %d\n", r); return false; @@ -443,14 +347,7 @@ bool ImageChunk::MakePatch(const ImageChunk& src, std::vector* patch_da } size_t sz = static_cast(st.st_size); - // Change the chunk type to raw if the patch takes less space that way. - if (ChangeChunkToRaw(sz)) { - unlink(ptemp); - size_t patch_size = DataLengthForPatch(); - patch_data->resize(patch_size); - std::copy(DataForPatch(), DataForPatch() + patch_size, patch_data->begin()); - return true; - } + patch_data->resize(sz); if (!android::base::ReadFully(patch_fd, patch_data->data(), sz)) { printf("failed to read \"%s\" %s\n", ptemp, strerror(errno)); @@ -459,7 +356,6 @@ bool ImageChunk::MakePatch(const ImageChunk& src, std::vector* patch_da } unlink(ptemp); - SetSourceInfo(src); return true; } @@ -470,8 +366,8 @@ bool ImageChunk::ReconstructDeflateChunk() { return false; } - // We only check two combinations of encoder parameters: level 6 - // (the default) and level 9 (the maximum). + // We only check two combinations of encoder parameters: level 6 (the default) and level 9 + // (the maximum). for (int level = 6; level <= 9; level += 3) { if (TryReconstruction(level)) { compress_level_ = level; @@ -483,10 +379,9 @@ bool ImageChunk::ReconstructDeflateChunk() { } /* - * Takes the uncompressed data stored in the chunk, compresses it - * using the zlib parameters stored in the chunk, and checks that it - * matches exactly the compressed data we started with (also stored in - * the chunk). + * Takes the uncompressed data stored in the chunk, compresses it using the zlib parameters stored + * in the chunk, and checks that it matches exactly the compressed data we started with (also + * stored in the chunk). */ bool ImageChunk::TryReconstruction(int level) { z_stream strm; @@ -529,6 +424,156 @@ bool ImageChunk::TryReconstruction(int level) { return true; } +// PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track +// of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length). +class PatchChunk { + public: + PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector data) + : type_(tgt.GetType()), + source_start_(src.GetStartOffset()), + source_len_(src.GetRawDataLength()), + source_uncompressed_len_(src.DataLengthForPatch()), + target_start_(tgt.GetStartOffset()), + target_len_(tgt.GetRawDataLength()), + target_uncompressed_len_(tgt.DataLengthForPatch()), + target_compress_level_(tgt.GetCompressLevel()), + data_(std::move(data)) {} + + // Construct a CHUNK_RAW patch from the target data directly. + explicit PatchChunk(const ImageChunk& tgt) + : type_(CHUNK_RAW), + source_start_(0), + source_len_(0), + source_uncompressed_len_(0), + target_start_(tgt.GetStartOffset()), + target_len_(tgt.GetRawDataLength()), + target_uncompressed_len_(tgt.DataLengthForPatch()), + target_compress_level_(tgt.GetCompressLevel()), + data_(tgt.DataForPatch(), tgt.DataForPatch() + tgt.DataLengthForPatch()) {} + + // Return true if raw data size is smaller than the patch size. + static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); + + static bool WritePatchDataToFd(const std::vector& patch_chunks, int patch_fd); + + private: + size_t GetHeaderSize() const; + size_t WriteHeaderToFd(int fd, size_t offset) const; + + // The patch chunk type is the same as the target chunk type. The only exception is we change + // the |type_| to CHUNK_RAW if target length is smaller than the patch size. + int type_; + + size_t source_start_; + size_t source_len_; + size_t source_uncompressed_len_; + + size_t target_start_; // offset of the target chunk within the target file + size_t target_len_; + size_t target_uncompressed_len_; + size_t target_compress_level_; // the deflate compression level of the target chunk. + + std::vector data_; // storage for the patch data +}; + +// Return true if raw data is smaller than the patch size. +bool PatchChunk::RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size) { + size_t target_len = tgt.GetRawDataLength(); + return (tgt.GetType() == CHUNK_NORMAL && (target_len <= 160 || target_len < patch_size)); +} + +// Header size: +// header_type 4 bytes +// CHUNK_NORMAL 8*3 = 24 bytes +// CHUNK_DEFLATE 8*5 + 4*5 = 60 bytes +// CHUNK_RAW 4 bytes + patch_size +size_t PatchChunk::GetHeaderSize() const { + switch (type_) { + case CHUNK_NORMAL: + return 4 + 8 * 3; + case CHUNK_DEFLATE: + return 4 + 8 * 5 + 4 * 5; + case CHUNK_RAW: + return 4 + 4 + data_.size(); + default: + CHECK(false) << "unexpected chunk type: " << type_; // Should not reach here. + return 0; + } +} + +// Return the offset of the next patch into the patch data. +size_t PatchChunk::WriteHeaderToFd(int fd, size_t offset) const { + Write4(fd, type_); + switch (type_) { + case CHUNK_NORMAL: + printf("normal (%10zu, %10zu) %10zu\n", target_start_, target_len_, data_.size()); + Write8(fd, static_cast(source_start_)); + Write8(fd, static_cast(source_len_)); + Write8(fd, static_cast(offset)); + return offset + data_.size(); + case CHUNK_DEFLATE: + printf("deflate (%10zu, %10zu) %10zu\n", target_start_, target_len_, data_.size()); + Write8(fd, static_cast(source_start_)); + Write8(fd, static_cast(source_len_)); + Write8(fd, static_cast(offset)); + Write8(fd, static_cast(source_uncompressed_len_)); + Write8(fd, static_cast(target_uncompressed_len_)); + Write4(fd, target_compress_level_); + Write4(fd, ImageChunk::METHOD); + Write4(fd, ImageChunk::WINDOWBITS); + Write4(fd, ImageChunk::MEMLEVEL); + Write4(fd, ImageChunk::STRATEGY); + return offset + data_.size(); + case CHUNK_RAW: + printf("raw (%10zu, %10zu)\n", target_start_, target_len_); + Write4(fd, static_cast(data_.size())); + if (!android::base::WriteFully(fd, data_.data(), data_.size())) { + CHECK(false) << "failed to write " << data_.size() << " bytes patch"; + } + return offset; + default: + CHECK(false) << "unexpected chunk type: " << type_; + return offset; + } +} + +// Write the contents of |patch_chunks| to |patch_fd|. +bool PatchChunk::WritePatchDataToFd(const std::vector& patch_chunks, int patch_fd) { + // Figure out how big the imgdiff file header is going to be, so that we can correctly compute + // the offset of each bsdiff patch within the file. + size_t total_header_size = 12; + for (const auto& patch : patch_chunks) { + total_header_size += patch.GetHeaderSize(); + } + + size_t offset = total_header_size; + + // Write out the headers. + if (!android::base::WriteStringToFd("IMGDIFF2", patch_fd)) { + printf("failed to write \"IMGDIFF2\": %s\n", strerror(errno)); + return false; + } + + Write4(patch_fd, static_cast(patch_chunks.size())); + for (size_t i = 0; i < patch_chunks.size(); ++i) { + printf("chunk %zu: ", i); + offset = patch_chunks[i].WriteHeaderToFd(patch_fd, offset); + } + + // Append each chunk's bsdiff patch, in order. + for (const auto& patch : patch_chunks) { + if (patch.type_ == CHUNK_RAW) { + continue; + } + if (!android::base::WriteFully(patch_fd, patch.data_.data(), patch.data_.size())) { + printf("failed to write %zu bytes patch to patch_fd\n", patch.data_.size()); + return false; + } + } + + return true; +} + // Interface for zip_mode and image_mode images. We initialize the image from an input file and // split the file content into a list of image chunks. class Image { @@ -548,8 +593,7 @@ class Image { // also if |find_normal| is true. ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false); - // Write the contents of |patch_data| to |patch_fd|. - bool WritePatchDataToFd(const std::vector>& patch_data, int patch_fd) const; + const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const; void DumpChunks() const; @@ -561,10 +605,15 @@ class Image { std::vector::iterator end() { return chunks_.end(); } - // Return a pointer to the ith ImageChunk. - ImageChunk* Get(size_t i) { + + ImageChunk& operator[](size_t i) { CHECK_LT(i, chunks_.size()); - return &chunks_[i]; + return chunks_[i]; + } + + const ImageChunk& operator[](size_t i) const { + CHECK_LT(i, chunks_.size()); + return chunks_[i]; } size_t NumOfChunks() const { @@ -601,7 +650,7 @@ void Image::MergeAdjacentNormalChunks() { } } -ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) { +const ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) const { if (name.empty()) { return nullptr; } @@ -613,40 +662,9 @@ ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) { return nullptr; } -bool Image::WritePatchDataToFd(const std::vector>& patch_data, - int patch_fd) const { - // Figure out how big the imgdiff file header is going to be, so that we can correctly compute - // the offset of each bsdiff patch within the file. - CHECK_EQ(chunks_.size(), patch_data.size()); - size_t total_header_size = 12; - for (size_t i = 0; i < chunks_.size(); ++i) { - total_header_size += chunks_[i].GetHeaderSize(patch_data[i].size()); - } - - size_t offset = total_header_size; - - // Write out the headers. - if (!android::base::WriteStringToFd("IMGDIFF2", patch_fd)) { - printf("failed to write \"IMGDIFF2\": %s\n", strerror(errno)); - return false; - } - Write4(patch_fd, static_cast(chunks_.size())); - for (size_t i = 0; i < chunks_.size(); ++i) { - printf("chunk %zu: ", i); - offset = chunks_[i].WriteHeaderToFd(patch_fd, patch_data[i], offset); - } - - // Append each chunk's bsdiff patch, in order. - for (size_t i = 0; i < chunks_.size(); ++i) { - if (chunks_[i].GetType() != CHUNK_RAW) { - if (!android::base::WriteFully(patch_fd, patch_data[i].data(), patch_data[i].size())) { - printf("failed to write %zu bytes patch for chunk %zu\n", patch_data[i].size(), i); - return false; - } - } - } - - return true; +ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) { + return const_cast( + static_cast(this)->FindChunkByName(name, find_normal)); } void Image::DumpChunks() const { @@ -699,8 +717,8 @@ class ZipModeImage : public Image { // src and tgt are identical. static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image); - // Compute the patches against the input image, and write the data into |patch_name|. - static bool GeneratePatches(ZipModeImage* tgt_image, ZipModeImage* src_image, + // Compute the patch between tgt & src images, and write the data into |patch_name|. + static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, const std::string& patch_name); private: @@ -834,14 +852,11 @@ bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::strin ErrorCodeString(ret)); return false; } - ImageChunk curr(CHUNK_DEFLATE, entry->offset, &file_content_, compressed_len); - curr.SetEntryName(entry_name); + ImageChunk curr(CHUNK_DEFLATE, entry->offset, &file_content_, compressed_len, entry_name); curr.SetUncompressedData(std::move(uncompressed_data)); - chunks_.push_back(curr); + chunks_.push_back(std::move(curr)); } else { - ImageChunk curr(CHUNK_NORMAL, entry->offset, &file_content_, compressed_len); - curr.SetEntryName(entry_name); - chunks_.push_back(curr); + chunks_.emplace_back(CHUNK_NORMAL, entry->offset, &file_content_, compressed_len, entry_name); } return true; @@ -907,40 +922,55 @@ bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* } } - return true; -} - -bool ZipModeImage::GeneratePatches(ZipModeImage* tgt_image, ZipModeImage* src_image, - const std::string& patch_name) { // For zips, we only need merge normal chunks for the target: deflated chunks are matched via // filename, and normal chunks are patched using the entire source file as the source. tgt_image->MergeAdjacentNormalChunks(); tgt_image->DumpChunks(); - printf("Construct patches for %zu chunks...\n", tgt_image->NumOfChunks()); - std::vector> patch_data(tgt_image->NumOfChunks()); + return true; +} + +bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + const std::string& patch_name) { + printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); + std::vector patch_chunks; + patch_chunks.reserve(tgt_image.NumOfChunks()); saidx_t* bsdiff_cache = nullptr; - size_t i = 0; - for (auto& tgt_chunk : *tgt_image) { - ImageChunk* src_chunk = (tgt_chunk.GetType() != CHUNK_DEFLATE) - ? nullptr - : src_image->FindChunkByName(tgt_chunk.GetEntryName()); + for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { + const auto& tgt_chunk = tgt_image[i]; + + if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { + patch_chunks.emplace_back(tgt_chunk); + continue; + } - const auto& src_ref = (src_chunk == nullptr) ? src_image->PseudoSource() : *src_chunk; + const ImageChunk* src_chunk = (tgt_chunk.GetType() != CHUNK_DEFLATE) + ? nullptr + : src_image.FindChunkByName(tgt_chunk.GetEntryName()); + + const auto& src_ref = (src_chunk == nullptr) ? src_image.PseudoSource() : *src_chunk; saidx_t** bsdiff_cache_ptr = (src_chunk == nullptr) ? &bsdiff_cache : nullptr; - if (!tgt_chunk.MakePatch(src_ref, &patch_data[i], bsdiff_cache_ptr)) { + std::vector patch_data; + if (!ImageChunk::MakePatch(tgt_chunk, src_ref, &patch_data, bsdiff_cache_ptr)) { printf("Failed to generate patch, name: %s\n", tgt_chunk.GetEntryName().c_str()); return false; } - printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data[i].size(), + printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data.size(), tgt_chunk.GetRawDataLength()); - i++; + + if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { + patch_chunks.emplace_back(tgt_chunk); + } else { + patch_chunks.emplace_back(tgt_chunk, src_ref, std::move(patch_data)); + } } free(bsdiff_cache); + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); + android::base::unique_fd patch_fd( open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (patch_fd == -1) { @@ -948,7 +978,7 @@ bool ZipModeImage::GeneratePatches(ZipModeImage* tgt_image, ZipModeImage* src_im return false; } - return tgt_image->WritePatchDataToFd(patch_data, patch_fd); + return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } class ImageModeImage : public Image { @@ -958,14 +988,16 @@ class ImageModeImage : public Image { // Initialize the image chunks list by searching the magic numbers in an image file. bool Initialize(const std::string& filename) override; + bool SetBonusData(const std::vector& bonus_data); + // In Image Mode, verify that the source and target images have the same chunk structure (ie, the // same sequence of deflate and normal chunks). static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image); // In image mode, generate patches against the given source chunks and bonus_data; write the // result to |patch_name|. - static bool GeneratePatches(ImageModeImage* tgt_image, ImageModeImage* src_image, - const std::vector& bonus_data, const std::string& patch_name); + static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image, + const std::string& patch_name); }; bool ImageModeImage::Initialize(const std::string& filename) { @@ -1053,7 +1085,7 @@ bool ImageModeImage::Initialize(const std::string& filename) { ImageChunk body(CHUNK_DEFLATE, pos, &file_content_, raw_data_len); uncompressed_data.resize(uncompressed_len); body.SetUncompressedData(std::move(uncompressed_data)); - chunks_.push_back(body); + chunks_.push_back(std::move(body)); pos += raw_data_len; @@ -1083,6 +1115,18 @@ bool ImageModeImage::Initialize(const std::string& filename) { return true; } +bool ImageModeImage::SetBonusData(const std::vector& bonus_data) { + CHECK(is_source_); + if (chunks_.size() < 2 || !chunks_[1].SetBonusData(bonus_data)) { + printf("Failed to set bonus data\n"); + DumpChunks(); + return false; + } + + printf(" using %zu bytes of bonus data\n", bonus_data.size()); + return true; +} + // In Image Mode, verify that the source and target images have the same chunk structure (ie, the // same sequence of deflate and normal chunks). bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image) { @@ -1097,7 +1141,7 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI return false; } for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { - if (tgt_image->Get(i)->GetType() != src_image->Get(i)->GetType()) { + if ((*tgt_image)[i].GetType() != (*src_image)[i].GetType()) { printf("source and target don't have same chunk structure! (chunk %zu)\n", i); tgt_image->DumpChunks(); src_image->DumpChunks(); @@ -1106,26 +1150,23 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI } for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { - auto& tgt_chunk = *tgt_image->Get(i); - auto& src_chunk = *src_image->Get(i); + auto& tgt_chunk = (*tgt_image)[i]; + auto& src_chunk = (*src_image)[i]; if (tgt_chunk.GetType() != CHUNK_DEFLATE) { continue; } - // Confirm that we can recompress the data and get exactly the same bits as are in the - // input target image. - if (!tgt_chunk.ReconstructDeflateChunk()) { - printf("failed to reconstruct target deflate chunk %zu [%s]; treating as normal\n", i, - tgt_chunk.GetEntryName().c_str()); - tgt_chunk.ChangeDeflateChunkToNormal(); - src_chunk.ChangeDeflateChunkToNormal(); - continue; - } - // If two deflate chunks are identical treat them as normal chunks. if (tgt_chunk == src_chunk) { tgt_chunk.ChangeDeflateChunkToNormal(); src_chunk.ChangeDeflateChunkToNormal(); + } else if (!tgt_chunk.ReconstructDeflateChunk()) { + // We cannot recompress the data and get exactly the same bits as are in the input target + // image, fall back to normal + printf("failed to reconstruct target deflate chunk %zu [%s]; treating as normal\n", i, + tgt_chunk.GetEntryName().c_str()); + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk.ChangeDeflateChunkToNormal(); } } @@ -1144,29 +1185,39 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI // In image mode, generate patches against the given source chunks and bonus_data; write the // result to |patch_name|. -bool ImageModeImage::GeneratePatches(ImageModeImage* tgt_image, ImageModeImage* src_image, - const std::vector& bonus_data, +bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, + const ImageModeImage& src_image, const std::string& patch_name) { - printf("Construct patches for %zu chunks...\n", tgt_image->NumOfChunks()); - std::vector> patch_data(tgt_image->NumOfChunks()); + printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); + std::vector patch_chunks; + patch_chunks.reserve(tgt_image.NumOfChunks()); - for (size_t i = 0; i < tgt_image->NumOfChunks(); i++) { - auto& tgt_chunk = *tgt_image->Get(i); - auto& src_chunk = *src_image->Get(i); + for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { + const auto& tgt_chunk = tgt_image[i]; + const auto& src_chunk = src_image[i]; - if (i == 1 && !bonus_data.empty()) { - printf(" using %zu bytes of bonus data for chunk %zu\n", bonus_data.size(), i); - src_chunk.SetBonusData(bonus_data); + if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { + patch_chunks.emplace_back(tgt_chunk); + continue; } - if (!tgt_chunk.MakePatch(src_chunk, &patch_data[i], nullptr)) { + std::vector patch_data; + if (!ImageChunk::MakePatch(tgt_chunk, src_chunk, &patch_data, nullptr)) { printf("Failed to generate patch for target chunk %zu: ", i); return false; } - printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data[i].size(), + printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data.size(), tgt_chunk.GetRawDataLength()); + + if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { + patch_chunks.emplace_back(tgt_chunk); + } else { + patch_chunks.emplace_back(tgt_chunk, src_chunk, std::move(patch_data)); + } } + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); + android::base::unique_fd patch_fd( open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (patch_fd == -1) { @@ -1174,7 +1225,7 @@ bool ImageModeImage::GeneratePatches(ImageModeImage* tgt_image, ImageModeImage* return false; } - return tgt_image->WritePatchDataToFd(patch_data, patch_fd); + return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } int imgdiff(int argc, const char** argv) { @@ -1236,7 +1287,7 @@ int imgdiff(int argc, const char** argv) { } // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of // deflate chunks). - if (!ZipModeImage::GeneratePatches(&tgt_image, &src_image, argv[optind + 2])) { + if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { return 1; } } else { @@ -1253,7 +1304,12 @@ int imgdiff(int argc, const char** argv) { if (!ImageModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { return 1; } - if (!ImageModeImage::GeneratePatches(&tgt_image, &src_image, bonus_data, argv[optind + 2])) { + + if (!bonus_data.empty() && !src_image.SetBonusData(bonus_data)) { + return 1; + } + + if (!ImageModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { return 1; } } -- cgit v1.2.3 From 57dd96199570beb29ea8b0f3934c594cd42e3043 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Thu, 17 Aug 2017 17:50:56 -0700 Subject: Move Image/ImageChunk/PatchChunk declaration into header files 1. Move the declaration of the Image classes to the header file to make testing easier. 2. Also move rangeset.h to bootable/recovery to allow access in imgdiff. Test: recovery component test Change-Id: I68a863e60a3f2e7ae46ee48f48eb15391f5f4330 --- applypatch/imgdiff.cpp | 335 ++++++++++--------------------------------------- 1 file changed, 65 insertions(+), 270 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index a81e385a3..59b600713 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -140,11 +140,12 @@ #include #include #include -#include - #include +#include #include +#include "applypatch/imgdiff_image.h" + using android::base::get_unaligned; static constexpr auto BUFFER_SIZE = 0x8000; @@ -161,99 +162,16 @@ static inline bool Write4(int fd, int32_t value) { return android::base::WriteFully(fd, &value, sizeof(int32_t)); } -class ImageChunk { - public: - static constexpr auto WINDOWBITS = -15; // 32kb window; negative to indicate a raw stream. - static constexpr auto MEMLEVEL = 8; // the default value. - static constexpr auto METHOD = Z_DEFLATED; - static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY; - - ImageChunk(int type, size_t start, const std::vector* file_content, size_t raw_data_len, - std::string entry_name = {}) - : type_(type), - start_(start), - input_file_ptr_(file_content), - raw_data_len_(raw_data_len), - compress_level_(6), - entry_name_(std::move(entry_name)) { - CHECK(file_content != nullptr) << "input file container can't be nullptr"; - } - - int GetType() const { - return type_; - } - size_t GetRawDataLength() const { - return raw_data_len_; - } - const std::string& GetEntryName() const { - return entry_name_; - } - size_t GetStartOffset() const { - return start_; - } - int GetCompressLevel() const { - return compress_level_; - } - - // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return - // the raw data. - const uint8_t * DataForPatch() const; - size_t DataLengthForPatch() const; - - void Dump() const { - printf("type: %d, start: %zu, len: %zu, name: %s\n", type_, start_, DataLengthForPatch(), - entry_name_.c_str()); - } - - void SetUncompressedData(std::vector data); - bool SetBonusData(const std::vector& bonus_data); - - bool operator==(const ImageChunk& other) const; - bool operator!=(const ImageChunk& other) const { - return !(*this == other); - } - - /* - * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data). - * The resulting patch will likely be about as big as the target file, but it lets us handle - * the case of images where some gzip chunks are reconstructible but others aren't (by treating - * the ones that aren't as normal chunks). - */ - void ChangeDeflateChunkToNormal(); - - /* - * Verify that we can reproduce exactly the same compressed data that we started with. Sets the - * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding - * parameters needed to produce the right output. - */ - bool ReconstructDeflateChunk(); - bool IsAdjacentNormal(const ImageChunk& other) const; - void MergeAdjacentNormal(const ImageChunk& other); - - /* - * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data. - * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used - * repeatedly, pass nullptr if not needed. - */ - static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src, - std::vector* patch_data, saidx_t** bsdiff_cache); - - private: - const uint8_t* GetRawData() const; - bool TryReconstruction(int level); - - int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW - size_t start_; // offset of chunk in the original input file - const std::vector* input_file_ptr_; // ptr to the full content of original input file - size_t raw_data_len_; - - // deflate encoder parameters - int compress_level_; - - // --- for CHUNK_DEFLATE chunks only: --- - std::vector uncompressed_data_; - std::string entry_name_; // used for zip entries -}; +ImageChunk::ImageChunk(int type, size_t start, const std::vector* file_content, + size_t raw_data_len, std::string entry_name) + : type_(type), + start_(start), + input_file_ptr_(file_content), + raw_data_len_(raw_data_len), + compress_level_(6), + entry_name_(std::move(entry_name)) { + CHECK(file_content != nullptr) << "input file container can't be nullptr"; +} const uint8_t* ImageChunk::GetRawData() const { CHECK_LE(start_ + raw_data_len_, input_file_ptr_->size()); @@ -424,57 +342,28 @@ bool ImageChunk::TryReconstruction(int level) { return true; } -// PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track -// of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length). -class PatchChunk { - public: - PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector data) - : type_(tgt.GetType()), - source_start_(src.GetStartOffset()), - source_len_(src.GetRawDataLength()), - source_uncompressed_len_(src.DataLengthForPatch()), - target_start_(tgt.GetStartOffset()), - target_len_(tgt.GetRawDataLength()), - target_uncompressed_len_(tgt.DataLengthForPatch()), - target_compress_level_(tgt.GetCompressLevel()), - data_(std::move(data)) {} - - // Construct a CHUNK_RAW patch from the target data directly. - explicit PatchChunk(const ImageChunk& tgt) - : type_(CHUNK_RAW), - source_start_(0), - source_len_(0), - source_uncompressed_len_(0), - target_start_(tgt.GetStartOffset()), - target_len_(tgt.GetRawDataLength()), - target_uncompressed_len_(tgt.DataLengthForPatch()), - target_compress_level_(tgt.GetCompressLevel()), - data_(tgt.DataForPatch(), tgt.DataForPatch() + tgt.DataLengthForPatch()) {} - - // Return true if raw data size is smaller than the patch size. - static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); - - static bool WritePatchDataToFd(const std::vector& patch_chunks, int patch_fd); - - private: - size_t GetHeaderSize() const; - size_t WriteHeaderToFd(int fd, size_t offset) const; - - // The patch chunk type is the same as the target chunk type. The only exception is we change - // the |type_| to CHUNK_RAW if target length is smaller than the patch size. - int type_; - - size_t source_start_; - size_t source_len_; - size_t source_uncompressed_len_; - - size_t target_start_; // offset of the target chunk within the target file - size_t target_len_; - size_t target_uncompressed_len_; - size_t target_compress_level_; // the deflate compression level of the target chunk. - - std::vector data_; // storage for the patch data -}; +PatchChunk::PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector data) + : type_(tgt.GetType()), + source_start_(src.GetStartOffset()), + source_len_(src.GetRawDataLength()), + source_uncompressed_len_(src.DataLengthForPatch()), + target_start_(tgt.GetStartOffset()), + target_len_(tgt.GetRawDataLength()), + target_uncompressed_len_(tgt.DataLengthForPatch()), + target_compress_level_(tgt.GetCompressLevel()), + data_(std::move(data)) {} + +// Construct a CHUNK_RAW patch from the target data directly. +PatchChunk::PatchChunk(const ImageChunk& tgt) + : type_(CHUNK_RAW), + source_start_(0), + source_len_(0), + source_uncompressed_len_(0), + target_start_(tgt.GetStartOffset()), + target_len_(tgt.GetRawDataLength()), + target_uncompressed_len_(tgt.DataLengthForPatch()), + target_compress_level_(tgt.GetCompressLevel()), + data_(tgt.DataForPatch(), tgt.DataForPatch() + tgt.DataLengthForPatch()) {} // Return true if raw data is smaller than the patch size. bool PatchChunk::RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size) { @@ -574,59 +463,15 @@ bool PatchChunk::WritePatchDataToFd(const std::vector& patch_chunks, return true; } -// Interface for zip_mode and image_mode images. We initialize the image from an input file and -// split the file content into a list of image chunks. -class Image { - public: - explicit Image(bool is_source) : is_source_(is_source) {} - - virtual ~Image() {} - - // Create a list of image chunks from input file. - virtual bool Initialize(const std::string& filename) = 0; - - // Look for runs of adjacent normal chunks and compress them down into a single chunk. (Such - // runs can be produced when deflate chunks are changed to normal chunks.) - void MergeAdjacentNormalChunks(); - - // In zip mode, find the matching deflate source chunk by entry name. Search for normal chunks - // also if |find_normal| is true. - ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false); - - const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const; - - void DumpChunks() const; - - // Non const iterators to access the stored ImageChunks. - std::vector::iterator begin() { - return chunks_.begin(); - } - - std::vector::iterator end() { - return chunks_.end(); - } - - ImageChunk& operator[](size_t i) { - CHECK_LT(i, chunks_.size()); - return chunks_[i]; - } - - const ImageChunk& operator[](size_t i) const { - CHECK_LT(i, chunks_.size()); - return chunks_[i]; - } - - size_t NumOfChunks() const { - return chunks_.size(); - } - - protected: - bool ReadFile(const std::string& filename, std::vector* file_content); +ImageChunk& Image::operator[](size_t i) { + CHECK_LT(i, chunks_.size()); + return chunks_[i]; +} - bool is_source_; // True if it's for source chunks. - std::vector chunks_; // Internal storage of ImageChunk. - std::vector file_content_; // Store the whole input file in memory. -}; +const ImageChunk& Image::operator[](size_t i) const { + CHECK_LT(i, chunks_.size()); + return chunks_[i]; +} void Image::MergeAdjacentNormalChunks() { size_t merged_last = 0, cur = 0; @@ -650,23 +495,6 @@ void Image::MergeAdjacentNormalChunks() { } } -const ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) const { - if (name.empty()) { - return nullptr; - } - for (auto& chunk : chunks_) { - if ((chunk.GetType() == CHUNK_DEFLATE || find_normal) && chunk.GetEntryName() == name) { - return &chunk; - } - } - return nullptr; -} - -ImageChunk* Image::FindChunkByName(const std::string& name, bool find_normal) { - return const_cast( - static_cast(this)->FindChunkByName(name, find_normal)); -} - void Image::DumpChunks() const { std::string type = is_source_ ? "source" : "target"; printf("Dumping chunks for %s\n", type.c_str()); @@ -701,39 +529,6 @@ bool Image::ReadFile(const std::string& filename, std::vector* file_con return true; } -class ZipModeImage : public Image { - public: - explicit ZipModeImage(bool is_source) : Image(is_source) {} - - bool Initialize(const std::string& filename) override; - - const ImageChunk& PseudoSource() const { - CHECK(is_source_); - CHECK(pseudo_source_ != nullptr); - return *pseudo_source_; - } - - // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if - // src and tgt are identical. - static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image); - - // Compute the patch between tgt & src images, and write the data into |patch_name|. - static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, - const std::string& patch_name); - - private: - // Initialize image chunks based on the zip entries. - bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); - // Add the a zip entry to the list. - bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); - // Return the real size of the zip file. (omit the trailing zeros that used for alignment) - bool GetZipFileSize(size_t* input_file_size); - - // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in - // fact the whole source file. - std::unique_ptr pseudo_source_; -}; - bool ZipModeImage::Initialize(const std::string& filename) { if (!ReadFile(filename, &file_content_)) { return false; @@ -754,9 +549,6 @@ bool ZipModeImage::Initialize(const std::string& filename) { return false; } - if (is_source_) { - pseudo_source_ = std::make_unique(CHUNK_NORMAL, 0, &file_content_, zipfile_size); - } if (!InitializeChunks(filename, handle)) { CloseArchive(handle); return false; @@ -895,6 +687,28 @@ bool ZipModeImage::GetZipFileSize(size_t* input_file_size) { return false; } +ImageChunk ZipModeImage::PseudoSource() const { + CHECK(is_source_); + return ImageChunk(CHUNK_NORMAL, 0, &file_content_, file_content_.size()); +} + +const ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool find_normal) const { + if (name.empty()) { + return nullptr; + } + for (auto& chunk : chunks_) { + if ((chunk.GetType() == CHUNK_DEFLATE || find_normal) && chunk.GetEntryName() == name) { + return &chunk; + } + } + return nullptr; +} + +ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool find_normal) { + return const_cast( + static_cast(this)->FindChunkByName(name, find_normal)); +} + bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image) { for (auto& tgt_chunk : *tgt_image) { if (tgt_chunk.GetType() != CHUNK_DEFLATE) { @@ -981,25 +795,6 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } -class ImageModeImage : public Image { - public: - explicit ImageModeImage(bool is_source) : Image(is_source) {} - - // Initialize the image chunks list by searching the magic numbers in an image file. - bool Initialize(const std::string& filename) override; - - bool SetBonusData(const std::vector& bonus_data); - - // In Image Mode, verify that the source and target images have the same chunk structure (ie, the - // same sequence of deflate and normal chunks). - static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image); - - // In image mode, generate patches against the given source chunks and bonus_data; write the - // result to |patch_name|. - static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image, - const std::string& patch_name); -}; - bool ImageModeImage::Initialize(const std::string& filename) { if (!ReadFile(filename, &file_content_)) { return false; -- cgit v1.2.3 From 2903cddb58f6ee99116e0751a2305f75f9a86461 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Fri, 18 Aug 2017 18:15:47 -0700 Subject: Improve imgdiff for large zip files Due to the cache size limit for OTA generation, we used to split large zip files linearly into pieces and do bsdiff on them. As a result, i) we lose the advantage of imgdiff; ii) if there's an accidental order change of some huge files inside the zip, we'll create an insanely large patch. This patch splits the src&tgt more smartly based on the zip entry_name. If the entry_name is empty or no matching source is found for a target chunk, we'll skip adding its source and later do a bsdiff against the whole split source image (this rarely happens in our use cases except for the metadata inside a ziparchive). After the split, the target pieces are continuous and block aligned, while the sources pieces are mutually exclusive. (Some of the source blocks may not be used if there's no matching entry_name in the target.) Then we will generate patches accordingly between each split image pairs. Afterwards, if we apply imgpatch to each pair of split source/target images and add up the patched result, we can get back the original target image. For example: Input: [src_image, tgt_image] Split: [src-0,tgt-0; src-1,tgt-1, src-2,tgt-2] Diff: [ patch-0; patch-1; patch-2] Patch: [(src-0,patch-0)=tgt-0; (src-1,patch-1)=tgt-1; (src-2,patch-2)=tgt-2;] Append: [tgt-0 + tgt-1 + tgt-2 = tgt_image] Peformance: For the small package in b/34220646, we decrease the patch size of chrome.apk dramatically from 30M to 400K due to the order change of two big .so files. On two versions of angler, I also observe decent patch size decrease. For chrome.apk, we reduced the size from 5.9M to 3.2M; and for vevlet.apk from 8.0M to 6.5M. Bug: 34220646 Test: recovery component test && apply imgdiff & imgpatch on two chrome.apk Change-Id: I145d802984fa805efbbac9d01a2e64d82ef9728b --- applypatch/imgdiff.cpp | 441 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 423 insertions(+), 18 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 59b600713..2eb618fbf 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -125,6 +125,7 @@ #include #include +#include #include #include #include @@ -139,16 +140,19 @@ #include #include #include +#include #include #include #include #include #include "applypatch/imgdiff_image.h" +#include "rangeset.h" using android::base::get_unaligned; -static constexpr auto BUFFER_SIZE = 0x8000; +static constexpr size_t BLOCK_SIZE = 4096; +static constexpr size_t BUFFER_SIZE = 0x8000; // If we use this function to write the offset and length (type size_t), their values should not // exceed 2^63; because the signed bit will be casted away. @@ -162,6 +166,67 @@ static inline bool Write4(int fd, int32_t value) { return android::base::WriteFully(fd, &value, sizeof(int32_t)); } +// Trim the head or tail to align with the block size. Return false if the chunk has nothing left +// after alignment. +static bool AlignHead(size_t* start, size_t* length) { + size_t residual = (*start % BLOCK_SIZE == 0) ? 0 : BLOCK_SIZE - *start % BLOCK_SIZE; + + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the beginning. + *start += residual; + *length -= residual; + return true; +} + +static bool AlignTail(size_t* start, size_t* length) { + size_t residual = (*start + *length) % BLOCK_SIZE; + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the end. + *length -= residual; + return true; +} + +// Remove the used blocks from the source chunk to make sure the source ranges are mutually +// exclusive after split. Return false if we fail to get the non-overlapped ranges. In such +// a case, we'll skip the entire source chunk. +static bool RemoveUsedBlocks(size_t* start, size_t* length, const SortedRangeSet& used_ranges) { + if (!used_ranges.Overlaps(*start, *length)) { + return true; + } + + // TODO find the largest non-overlap chunk. + printf("Removing block %s from %zu - %zu\n", used_ranges.ToString().c_str(), *start, + *start + *length - 1); + + // If there's no duplicate entry name, we should only overlap in the head or tail block. Try to + // trim both blocks. Skip this source chunk in case it still overlaps with the used ranges. + if (AlignHead(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + if (AlignTail(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + + printf("Failed to remove the overlapped block ranges; skip the source\n"); + return false; +} + +static const struct option OPTIONS[] = { + { "zip-mode", no_argument, nullptr, 'z' }, + { "bonus-file", required_argument, nullptr, 'b' }, + { "block-limit", required_argument, nullptr, 0 }, + { "debug-dir", required_argument, nullptr, 0 }, + { nullptr, 0, nullptr, 0 }, +}; + ImageChunk::ImageChunk(int type, size_t start, const std::vector* file_content, size_t raw_data_len, std::string entry_name) : type_(type), @@ -371,6 +436,12 @@ bool PatchChunk::RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size) { return (tgt.GetType() == CHUNK_NORMAL && (target_len <= 160 || target_len < patch_size)); } +void PatchChunk::UpdateSourceOffset(const SortedRangeSet& src_range) { + if (type_ == CHUNK_DEFLATE) { + source_start_ = src_range.GetOffsetInRangeSet(source_start_); + } +} + // Header size: // header_type 4 bytes // CHUNK_NORMAL 8*3 = 24 bytes @@ -572,7 +643,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl ZipString name; ZipEntry entry; while ((ret = Next(cookie, &entry, &name)) == 0) { - if (entry.method == kCompressDeflated) { + if (entry.method == kCompressDeflated || limit_ > 0) { std::string entry_name(name.name, name.name + name.name_length); temp_entries.emplace_back(entry_name, entry); } @@ -595,6 +666,17 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl return false; } } + + // Add the end of zip file (mainly central directory) as a normal chunk. + size_t entries_end = 0; + if (!temp_entries.empty()) { + entries_end = static_cast(temp_entries.back().second.offset + + temp_entries.back().second.compressed_length); + } + CHECK_LT(entries_end, file_content_.size()); + chunks_.emplace_back(CHUNK_NORMAL, entries_end, &file_content_, + file_content_.size() - entries_end); + return true; } @@ -635,7 +717,21 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry) { size_t compressed_len = entry->compressed_length; - if (entry->method == kCompressDeflated) { + if (compressed_len == 0) return true; + + // Split the entry into several normal chunks if it's too large. + if (limit_ > 0 && compressed_len > limit_) { + int count = 0; + while (compressed_len > 0) { + size_t length = std::min(limit_, compressed_len); + std::string name = entry_name + "-" + std::to_string(count); + chunks_.emplace_back(CHUNK_NORMAL, entry->offset + limit_ * count, &file_content_, length, + name); + + count++; + compressed_len -= length; + } + } else if (entry->method == kCompressDeflated) { size_t uncompressed_len = entry->uncompressed_length; std::vector uncompressed_data(uncompressed_len); int ret = ExtractToMemory(handle, entry, uncompressed_data.data(), uncompressed_len); @@ -697,10 +793,23 @@ const ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool fi return nullptr; } for (auto& chunk : chunks_) { - if ((chunk.GetType() == CHUNK_DEFLATE || find_normal) && chunk.GetEntryName() == name) { + if (chunk.GetType() != CHUNK_DEFLATE && !find_normal) { + continue; + } + + if (chunk.GetEntryName() == name) { return &chunk; } + + // Edge case when target chunk is split due to size limit but source chunk isn't. + if (name == (chunk.GetEntryName() + "-0") || chunk.GetEntryName() == (name + "-0")) { + return &chunk; + } + + // TODO handle the .so files with incremental version number. + // (e.g. lib/arm64-v8a/libcronet.59.0.3050.4.so) } + return nullptr; } @@ -738,24 +847,214 @@ bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* // For zips, we only need merge normal chunks for the target: deflated chunks are matched via // filename, and normal chunks are patched using the entire source file as the source. - tgt_image->MergeAdjacentNormalChunks(); - tgt_image->DumpChunks(); + if (tgt_image->limit_ == 0) { + tgt_image->MergeAdjacentNormalChunks(); + tgt_image->DumpChunks(); + } return true; } -bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, - const std::string& patch_name) { +// For each target chunk, look for the corresponding source chunk by the zip_entry name. If +// found, add the range of this chunk in the original source file to the block aligned source +// ranges. Construct the split src & tgt image once the size of source range reaches limit. +bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector* split_tgt_images, + std::vector* split_src_images, + std::vector* split_src_ranges) { + CHECK_EQ(tgt_image.limit_, src_image.limit_); + size_t limit = tgt_image.limit_; + + src_image.DumpChunks(); + printf("Splitting %zu tgt chunks...\n", tgt_image.NumOfChunks()); + + SortedRangeSet used_src_ranges; // ranges used for previous split source images. + + // Reserve the central directory in advance for the last split image. + const auto& central_directory = src_image.cend() - 1; + CHECK_EQ(CHUNK_NORMAL, central_directory->GetType()); + used_src_ranges.Insert(central_directory->GetStartOffset(), + central_directory->DataLengthForPatch()); + + SortedRangeSet src_ranges; + std::vector split_src_chunks; + std::vector split_tgt_chunks; + for (auto tgt = tgt_image.cbegin(); tgt != tgt_image.cend(); tgt++) { + const ImageChunk* src = src_image.FindChunkByName(tgt->GetEntryName(), true); + if (src == nullptr) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + continue; + } + + size_t src_offset = src->GetStartOffset(); + size_t src_length = src->GetRawDataLength(); + + CHECK(src_length > 0); + CHECK_LE(src_length, limit); + + // Make sure this source range hasn't been used before so that the src_range pieces don't + // overlap with each other. + if (!RemoveUsedBlocks(&src_offset, &src_length, used_src_ranges)) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } else if (src_ranges.blocks() * BLOCK_SIZE + src_length <= limit) { + src_ranges.Insert(src_offset, src_length); + + // Add the deflate source chunk if it hasn't been aligned. + if (src->GetType() == CHUNK_DEFLATE && src_length == src->GetRawDataLength()) { + split_src_chunks.push_back(*src); + split_tgt_chunks.push_back(*tgt); + } else { + // TODO split smarter to avoid alignment of large deflate chunks + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } + } else { + ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, + split_src_chunks, split_tgt_images, + split_src_images); + + split_tgt_chunks.clear(); + split_src_chunks.clear(); + used_src_ranges.Insert(src_ranges); + split_src_ranges->push_back(std::move(src_ranges)); + src_ranges.Clear(); + + // We don't have enough space for the current chunk; start a new split image and handle + // this chunk there. + tgt--; + } + } + + // TODO Trim it in case the CD exceeds limit too much. + src_ranges.Insert(central_directory->GetStartOffset(), central_directory->DataLengthForPatch()); + ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, + split_src_chunks, split_tgt_images, split_src_images); + split_src_ranges->push_back(std::move(src_ranges)); + + ValidateSplitImages(*split_tgt_images, *split_src_images, *split_src_ranges, + tgt_image.file_content_.size()); + + return true; +} + +void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector& split_tgt_chunks, + const std::vector& split_src_chunks, + std::vector* split_tgt_images, + std::vector* split_src_images) { + CHECK(!split_tgt_chunks.empty()); + // Target chunks should occupy at least one block. + // TODO put a warning and change the type to raw if it happens in extremely rare cases. + size_t tgt_size = split_tgt_chunks.back().GetStartOffset() + + split_tgt_chunks.back().DataLengthForPatch() - + split_tgt_chunks.front().GetStartOffset(); + CHECK_GE(tgt_size, BLOCK_SIZE); + + std::vector aligned_tgt_chunks; + + // Align the target chunks in the beginning with BLOCK_SIZE. + size_t i = 0; + while (i < split_tgt_chunks.size()) { + size_t tgt_start = split_tgt_chunks[i].GetStartOffset(); + size_t tgt_length = split_tgt_chunks[i].GetRawDataLength(); + + // Current ImageChunk is long enough to align. + if (AlignHead(&tgt_start, &tgt_length)) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt_start, &tgt_image.file_content_, + tgt_length); + break; + } + + i++; + } + CHECK_LT(i, split_tgt_chunks.size()); + aligned_tgt_chunks.insert(aligned_tgt_chunks.end(), split_tgt_chunks.begin() + i + 1, + split_tgt_chunks.end()); + CHECK(!aligned_tgt_chunks.empty()); + + // Add a normal chunk to align the contents in the end. + size_t end_offset = + aligned_tgt_chunks.back().GetStartOffset() + aligned_tgt_chunks.back().GetRawDataLength(); + if (end_offset % BLOCK_SIZE != 0 && end_offset < tgt_image.file_content_.size()) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, end_offset, &tgt_image.file_content_, + BLOCK_SIZE - (end_offset % BLOCK_SIZE)); + } + + ZipModeImage split_tgt_image(false); + split_tgt_image.Initialize(std::move(aligned_tgt_chunks), {}); + split_tgt_image.MergeAdjacentNormalChunks(); + + // Construct the dummy source file based on the src_ranges. + std::vector src_content; + for (const auto& r : split_src_ranges) { + size_t end = std::min(src_image.file_content_.size(), r.second * BLOCK_SIZE); + src_content.insert(src_content.end(), src_image.file_content_.begin() + r.first * BLOCK_SIZE, + src_image.file_content_.begin() + end); + } + + // We should not have an empty src in our design; otherwise we will encounter an error in + // bsdiff since src_content.data() == nullptr. + CHECK(!src_content.empty()); + + ZipModeImage split_src_image(true); + split_src_image.Initialize(split_src_chunks, std::move(src_content)); + + split_tgt_images->push_back(std::move(split_tgt_image)); + split_src_images->push_back(std::move(split_src_image)); +} + +void ZipModeImage::ValidateSplitImages(const std::vector& split_tgt_images, + const std::vector& split_src_images, + std::vector& split_src_ranges, + size_t total_tgt_size) { + CHECK_EQ(split_tgt_images.size(), split_src_images.size()); + + printf("Validating %zu images\n", split_tgt_images.size()); + + // Verify that the target image pieces is continuous and can add up to the total size. + size_t last_offset = 0; + for (const auto& tgt_image : split_tgt_images) { + CHECK(!tgt_image.chunks_.empty()); + + CHECK_EQ(last_offset, tgt_image.chunks_.front().GetStartOffset()); + CHECK(last_offset % BLOCK_SIZE == 0); + + // Check the target chunks within the split image are continuous. + for (const auto& chunk : tgt_image.chunks_) { + CHECK_EQ(last_offset, chunk.GetStartOffset()); + last_offset += chunk.GetRawDataLength(); + } + } + CHECK_EQ(total_tgt_size, last_offset); + + // Verify that the source ranges are mutually exclusive. + CHECK_EQ(split_src_images.size(), split_src_ranges.size()); + SortedRangeSet used_src_ranges; + for (size_t i = 0; i < split_src_ranges.size(); i++) { + CHECK(!used_src_ranges.Overlaps(split_src_ranges[i])) + << "src range " << split_src_ranges[i].ToString() << " overlaps " + << used_src_ranges.ToString(); + used_src_ranges.Insert(split_src_ranges[i]); + } +} + +bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector* patch_chunks) { printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); - std::vector patch_chunks; - patch_chunks.reserve(tgt_image.NumOfChunks()); + patch_chunks->clear(); saidx_t* bsdiff_cache = nullptr; for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { const auto& tgt_chunk = tgt_image[i]; if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { - patch_chunks.emplace_back(tgt_chunk); + patch_chunks->emplace_back(tgt_chunk); continue; } @@ -776,13 +1075,23 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI tgt_chunk.GetRawDataLength()); if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { - patch_chunks.emplace_back(tgt_chunk); + patch_chunks->emplace_back(tgt_chunk); } else { - patch_chunks.emplace_back(tgt_chunk, src_ref, std::move(patch_data)); + patch_chunks->emplace_back(tgt_chunk, src_ref, std::move(patch_data)); } } free(bsdiff_cache); + CHECK_EQ(patch_chunks->size(), tgt_image.NumOfChunks()); + return true; +} + +bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + const std::string& patch_name) { + std::vector patch_chunks; + + ZipModeImage::GeneratePatchesInternal(tgt_image, src_image, &patch_chunks); + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); android::base::unique_fd patch_fd( @@ -795,6 +1104,66 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } +bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_images, + const std::vector& split_src_images, + const std::vector& split_src_ranges, + const std::string& patch_name, const std::string& debug_dir) { + printf("Construct patches for %zu split images...\n", split_tgt_images.size()); + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + return false; + } + + for (size_t i = 0; i < split_tgt_images.size(); i++) { + std::vector patch_chunks; + if (!ZipModeImage::GeneratePatchesInternal(split_tgt_images[i], split_src_images[i], + &patch_chunks)) { + printf("failed to generate split patch\n"); + return false; + } + + for (auto& p : patch_chunks) { + p.UpdateSourceOffset(split_src_ranges[i]); + } + + if (!PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd)) { + return false; + } + + // Write the split source & patch into the debug directory. + if (!debug_dir.empty()) { + std::string src_name = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + android::base::unique_fd fd( + open(src_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + printf("Failed to open %s\n", src_name.c_str()); + return false; + } + if (!android::base::WriteFully(fd, split_src_images[i].PseudoSource().DataForPatch(), + split_src_images[i].PseudoSource().DataLengthForPatch())) { + printf("Failed to write split source data into %s\n", src_name.c_str()); + return false; + } + + std::string patch_name = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + fd.reset(open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + printf("Failed to open %s\n", patch_name.c_str()); + return false; + } + if (!PatchChunk::WritePatchDataToFd(patch_chunks, fd)) { + return false; + } + } + } + return true; +} + bool ImageModeImage::Initialize(const std::string& filename) { if (!ReadFile(filename, &file_content_)) { return false; @@ -1026,11 +1395,14 @@ bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, int imgdiff(int argc, const char** argv) { bool zip_mode = false; std::vector bonus_data; + size_t blocks_limit = 0; + std::string debug_dir; int opt; + int option_index; optind = 1; // Reset the getopt state so that we can call it multiple times for test. - while ((opt = getopt(argc, const_cast(argv), "zb:")) != -1) { + while ((opt = getopt_long(argc, const_cast(argv), "zb:", OPTIONS, &option_index)) != -1) { switch (opt) { case 'z': zip_mode = true; @@ -1055,6 +1427,16 @@ int imgdiff(int argc, const char** argv) { } break; } + case 0: { + std::string name = OPTIONS[option_index].name; + if (name == "block-limit" && !android::base::ParseUint(optarg, &blocks_limit)) { + printf("failed to parse size blocks_limit: %s\n", optarg); + return 1; + } else if (name == "debug-dir") { + debug_dir = optarg; + } + break; + } default: printf("unexpected opt: %s\n", optarg); return 2; @@ -1062,13 +1444,20 @@ int imgdiff(int argc, const char** argv) { } if (argc - optind != 3) { - printf("usage: %s [-z] [-b ] \n", argv[0]); + printf("usage: %s [options] \n", argv[0]); + printf( + " -z , Generate patches in zip mode, src and tgt should be zip files.\n" + " -b , Bonus file in addition to src, image mode only.\n" + " --block-limit, For large zips, split the src and tgt based on the block limit;\n" + " and generate patches between each pair of pieces. Concatenate these\n" + " patches together and output them into .\n" + " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n"); return 2; } if (zip_mode) { - ZipModeImage src_image(true); - ZipModeImage tgt_image(false); + ZipModeImage src_image(true, blocks_limit * BLOCK_SIZE); + ZipModeImage tgt_image(false, blocks_limit * BLOCK_SIZE); if (!src_image.Initialize(argv[optind])) { return 1; @@ -1080,9 +1469,25 @@ int imgdiff(int argc, const char** argv) { if (!ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { return 1; } + + // TODO save and output the split information so that caller can create split transfer lists + // accordingly. + // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of // deflate chunks). - if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { + if (blocks_limit > 0) { + std::vector split_tgt_images; + std::vector split_src_images; + std::vector split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); + + if (!ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + argv[optind + 2], debug_dir)) { + return 1; + } + + } else if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { return 1; } } else { -- cgit v1.2.3 From 82582b4562bd2ffa9ebe9d25ecdc6222b053d6ef Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Thu, 31 Aug 2017 18:05:19 -0700 Subject: Output split information for imgdiff when handling large apks Add a mandatory option in imgdiff to write the split info (i.e. patch_size, tgt_size, src_ranges) to file when handling large apks. Therefore, the caller of imgdiff can create split transfers based on the info. Bug: 63542719 Test: unit tests pass Change-Id: I853d55d1f999fd576474faa81077f7307f4d856d --- applypatch/imgdiff.cpp | 189 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 129 insertions(+), 60 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 2eb618fbf..c887a854d 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -15,53 +15,44 @@ */ /* - * This program constructs binary patches for images -- such as boot.img - * and recovery.img -- that consist primarily of large chunks of gzipped - * data interspersed with uncompressed data. Doing a naive bsdiff of - * these files is not useful because small changes in the data lead to - * large changes in the compressed bitstream; bsdiff patches of gzipped - * data are typically as large as the data itself. + * This program constructs binary patches for images -- such as boot.img and recovery.img -- that + * consist primarily of large chunks of gzipped data interspersed with uncompressed data. Doing a + * naive bsdiff of these files is not useful because small changes in the data lead to large + * changes in the compressed bitstream; bsdiff patches of gzipped data are typically as large as + * the data itself. * - * To patch these usefully, we break the source and target images up into - * chunks of two types: "normal" and "gzip". Normal chunks are simply - * patched using a plain bsdiff. Gzip chunks are first expanded, then a - * bsdiff is applied to the uncompressed data, then the patched data is - * gzipped using the same encoder parameters. Patched chunks are - * concatenated together to create the output file; the output image - * should be *exactly* the same series of bytes as the target image used - * originally to generate the patch. + * To patch these usefully, we break the source and target images up into chunks of two types: + * "normal" and "gzip". Normal chunks are simply patched using a plain bsdiff. Gzip chunks are + * first expanded, then a bsdiff is applied to the uncompressed data, then the patched data is + * gzipped using the same encoder parameters. Patched chunks are concatenated together to create + * the output file; the output image should be *exactly* the same series of bytes as the target + * image used originally to generate the patch. * - * To work well with this tool, the gzipped sections of the target - * image must have been generated using the same deflate encoder that - * is available in applypatch, namely, the one in the zlib library. - * In practice this means that images should be compressed using the - * "minigzip" tool included in the zlib distribution, not the GNU gzip - * program. + * To work well with this tool, the gzipped sections of the target image must have been generated + * using the same deflate encoder that is available in applypatch, namely, the one in the zlib + * library. In practice this means that images should be compressed using the "minigzip" tool + * included in the zlib distribution, not the GNU gzip program. * - * An "imgdiff" patch consists of a header describing the chunk structure - * of the file and any encoding parameters needed for the gzipped - * chunks, followed by N bsdiff patches, one per chunk. + * An "imgdiff" patch consists of a header describing the chunk structure of the file and any + * encoding parameters needed for the gzipped chunks, followed by N bsdiff patches, one per chunk. * - * For a diff to be generated, the source and target images must have the - * same "chunk" structure: that is, the same number of gzipped and normal - * chunks in the same order. Android boot and recovery images currently - * consist of five chunks: a small normal header, a gzipped kernel, a - * small normal section, a gzipped ramdisk, and finally a small normal - * footer. + * For a diff to be generated, the source and target must be in well-formed zip archive format; + * or they are image files with the same "chunk" structure: that is, the same number of gzipped and + * normal chunks in the same order. Android boot and recovery images currently consist of five + * chunks: a small normal header, a gzipped kernel, a small normal section, a gzipped ramdisk, and + * finally a small normal footer. * - * Caveats: we locate gzipped sections within the source and target - * images by searching for the byte sequence 1f8b0800: 1f8b is the gzip - * magic number; 08 specifies the "deflate" encoding [the only encoding - * supported by the gzip standard]; and 00 is the flags byte. We do not - * currently support any extra header fields (which would be indicated by - * a nonzero flags byte). We also don't handle the case when that byte - * sequence appears spuriously in the file. (Note that it would have to - * occur spuriously within a normal chunk to be a problem.) + * Caveats: we locate gzipped sections within the source and target images by searching for the + * byte sequence 1f8b0800: 1f8b is the gzip magic number; 08 specifies the "deflate" encoding + * [the only encoding supported by the gzip standard]; and 00 is the flags byte. We do not + * currently support any extra header fields (which would be indicated by a nonzero flags byte). + * We also don't handle the case when that byte sequence appears spuriously in the file. (Note + * that it would have to occur spuriously within a normal chunk to be a problem.) * * * The imgdiff patch header looks like this: * - * "IMGDIFF1" (8) [magic number and version] + * "IMGDIFF2" (8) [magic number and version] * chunk count (4) * for each chunk: * chunk type (4) [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}] @@ -98,27 +89,55 @@ * target len (4) * data (target len) * - * All integers are little-endian. "source start" and "source len" - * specify the section of the input image that comprises this chunk, - * including the gzip header and footer for gzip chunks. "source - * expanded len" is the size of the uncompressed source data. "target - * expected len" is the size of the uncompressed data after applying - * the bsdiff patch. The next five parameters specify the zlib - * parameters to be used when compressing the patched data, and the - * next three specify the header and footer to be wrapped around the - * compressed data to create the output chunk (so that header contents - * like the timestamp are recreated exactly). + * All integers are little-endian. "source start" and "source len" specify the section of the + * input image that comprises this chunk, including the gzip header and footer for gzip chunks. + * "source expanded len" is the size of the uncompressed source data. "target expected len" is the + * size of the uncompressed data after applying the bsdiff patch. The next five parameters + * specify the zlib parameters to be used when compressing the patched data, and the next three + * specify the header and footer to be wrapped around the compressed data to create the output + * chunk (so that header contents like the timestamp are recreated exactly). * - * After the header there are 'chunk count' bsdiff patches; the offset - * of each from the beginning of the file is specified in the header. + * After the header there are 'chunk count' bsdiff patches; the offset of each from the beginning + * of the file is specified in the header. * - * This tool can take an optional file of "bonus data". This is an - * extra file of data that is appended to chunk #1 after it is - * compressed (it must be a CHUNK_DEFLATE chunk). The same file must - * be available (and passed to applypatch with -b) when applying the - * patch. This is used to reduce the size of recovery-from-boot - * patches by combining the boot image with recovery ramdisk + * This tool can take an optional file of "bonus data". This is an extra file of data that is + * appended to chunk #1 after it is compressed (it must be a CHUNK_DEFLATE chunk). The same file + * must be available (and passed to applypatch with -b) when applying the patch. This is used to + * reduce the size of recovery-from-boot patches by combining the boot image with recovery ramdisk * information that is stored on the system partition. + * + * When generating the patch between two zip files, this tool has an option "--block-limit" to + * split the large source/target files into several pair of pieces, with each piece has at most + * *limit* blocks. When this option is used, we also need to output the split info into the file + * path specified by "--split-info". + * + * Format of split info file: + * 2 [version of imgdiff] + * n [count of split pieces] + * , , [size and ranges for split piece#1] + * ... + * , , [size and ranges for split piece#n] + * + * To split a pair of large zip files, we walk through the chunks in target zip and search by its + * entry_name in the source zip. If the entry_name is non-empty and a matching entry in source + * is found, we'll add the source entry to the current split source image; otherwise we'll skip + * this chunk and later do bsdiff between all the skipped trunks and the whole split source image. + * We move on to the next pair of pieces if the size of the split source image reaches the block + * limit. + * + * After the split, the target pieces are continuous and block aligned, while the source pieces + * are mutually exclusive. Some of the source blocks may not be used if there's no matching + * entry_name in the target; as a result, they won't be included in any of these split source + * images. Then we will generate patches accordingly between each split image pairs; in particular, + * the unmatched trunks in the split target will diff against the entire split source image. + * + * For example: + * Input: [src_image, tgt_image] + * Split: [src-0, tgt-0; src-1, tgt-1, src-2, tgt-2] + * Diff: [ patch-0; patch-1; patch-2] + * + * Patch: [(src-0, patch-0) = tgt-0; (src-1, patch-1) = tgt-1; (src-2, patch-2) = tgt-2] + * Concatenate: [tgt-0 + tgt-1 + tgt-2 = tgt_image] */ #include "applypatch/imgdiff.h" @@ -151,6 +170,11 @@ using android::base::get_unaligned; +static constexpr size_t VERSION = 2; + +// We assume the header "IMGDIFF#" is 8 bytes. +static_assert(VERSION <= 9, "VERSION occupies more than one byte."); + static constexpr size_t BLOCK_SIZE = 4096; static constexpr size_t BUFFER_SIZE = 0x8000; @@ -224,6 +248,7 @@ static const struct option OPTIONS[] = { { "bonus-file", required_argument, nullptr, 'b' }, { "block-limit", required_argument, nullptr, 0 }, { "debug-dir", required_argument, nullptr, 0 }, + { "split-info", required_argument, nullptr, 0 }, { nullptr, 0, nullptr, 0 }, }; @@ -497,6 +522,13 @@ size_t PatchChunk::WriteHeaderToFd(int fd, size_t offset) const { } } +size_t PatchChunk::PatchSize() const { + if (type_ == CHUNK_RAW) { + return GetHeaderSize(); + } + return GetHeaderSize() + data_.size(); +} + // Write the contents of |patch_chunks| to |patch_fd|. bool PatchChunk::WritePatchDataToFd(const std::vector& patch_chunks, int patch_fd) { // Figure out how big the imgdiff file header is going to be, so that we can correctly compute @@ -509,8 +541,8 @@ bool PatchChunk::WritePatchDataToFd(const std::vector& patch_chunks, size_t offset = total_header_size; // Write out the headers. - if (!android::base::WriteStringToFd("IMGDIFF2", patch_fd)) { - printf("failed to write \"IMGDIFF2\": %s\n", strerror(errno)); + if (!android::base::WriteStringToFd("IMGDIFF" + std::to_string(VERSION), patch_fd)) { + printf("failed to write \"IMGDIFF%zu\": %s\n", VERSION, strerror(errno)); return false; } @@ -1107,7 +1139,9 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_images, const std::vector& split_src_images, const std::vector& split_src_ranges, - const std::string& patch_name, const std::string& debug_dir) { + const std::string& patch_name, + const std::string& split_info_file, + const std::string& debug_dir) { printf("Construct patches for %zu split images...\n", split_tgt_images.size()); android::base::unique_fd patch_fd( @@ -1117,6 +1151,7 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im return false; } + std::vector split_info_list; for (size_t i = 0; i < split_tgt_images.size(); i++) { std::vector patch_chunks; if (!ZipModeImage::GeneratePatchesInternal(split_tgt_images[i], split_src_images[i], @@ -1125,14 +1160,23 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im return false; } + size_t total_patch_size = 12; for (auto& p : patch_chunks) { p.UpdateSourceOffset(split_src_ranges[i]); + total_patch_size += p.PatchSize(); } if (!PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd)) { return false; } + size_t split_tgt_size = split_tgt_images[i].chunks_.back().GetStartOffset() + + split_tgt_images[i].chunks_.back().GetRawDataLength() - + split_tgt_images[i].chunks_.front().GetStartOffset(); + std::string split_info = android::base::StringPrintf( + "%zu %zu %s", total_patch_size, split_tgt_size, split_src_ranges[i].ToString().c_str()); + split_info_list.push_back(split_info); + // Write the split source & patch into the debug directory. if (!debug_dir.empty()) { std::string src_name = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); @@ -1161,6 +1205,21 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im } } } + + // Store the split in the following format: + // Line 0: imgdiff version# + // Line 1: number of pieces + // Line 2: patch_size_1 tgt_size_1 src_range_1 + // ... + // Line n+1: patch_size_n tgt_size_n src_range_n + std::string split_info_string = android::base::StringPrintf( + "%zu\n%zu\n", VERSION, split_info_list.size()) + android::base::Join(split_info_list, '\n'); + if (!android::base::WriteStringToFile(split_info_string, split_info_file)) { + printf("failed to write split info to \"%s\": %s\n", split_info_file.c_str(), + strerror(errno)); + return false; + } + return true; } @@ -1396,6 +1455,7 @@ int imgdiff(int argc, const char** argv) { bool zip_mode = false; std::vector bonus_data; size_t blocks_limit = 0; + std::string split_info_file; std::string debug_dir; int opt; @@ -1432,6 +1492,8 @@ int imgdiff(int argc, const char** argv) { if (name == "block-limit" && !android::base::ParseUint(optarg, &blocks_limit)) { printf("failed to parse size blocks_limit: %s\n", optarg); return 1; + } else if (name == "split-info") { + split_info_file = optarg; } else if (name == "debug-dir") { debug_dir = optarg; } @@ -1451,6 +1513,8 @@ int imgdiff(int argc, const char** argv) { " --block-limit, For large zips, split the src and tgt based on the block limit;\n" " and generate patches between each pair of pieces. Concatenate these\n" " patches together and output them into .\n" + " --split-info, Output the split information (patch_size, tgt_size, src_ranges);\n" + " zip mode with block-limit only.\n" " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n"); return 2; } @@ -1476,6 +1540,11 @@ int imgdiff(int argc, const char** argv) { // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of // deflate chunks). if (blocks_limit > 0) { + if (split_info_file.empty()) { + printf("split-info path cannot be empty when generating patches with a block-limit.\n"); + return 1; + } + std::vector split_tgt_images; std::vector split_src_images; std::vector split_src_ranges; @@ -1483,7 +1552,7 @@ int imgdiff(int argc, const char** argv) { &split_src_images, &split_src_ranges); if (!ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, - argv[optind + 2], debug_dir)) { + argv[optind + 2], split_info_file, debug_dir)) { return 1; } -- cgit v1.2.3 From 09e468f84cc245fba61d69165b4af8f1ec4cdfd5 Mon Sep 17 00:00:00 2001 From: Tao Bao Date: Fri, 29 Sep 2017 14:39:33 -0700 Subject: Move rangeset.h and print_sha1.h into otautil. Also drop the "bootable/recovery" path in LOCAL_C_INCLUDES from applypatch modules. Test: lunch aosp_{angler,bullhead,fugu,dragon,sailfish}-userdebug; mmma bootable/recovery Change-Id: Idd602a796894f971ee4f8fa3eafe36c42d9de986 --- applypatch/imgdiff.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index c887a854d..3a61a7d0d 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -166,7 +166,7 @@ #include #include "applypatch/imgdiff_image.h" -#include "rangeset.h" +#include "otautil/rangeset.h" using android::base::get_unaligned; -- cgit v1.2.3 From 45685820029fb191fe8509418df91a049227ea3a Mon Sep 17 00:00:00 2001 From: Tao Bao Date: Fri, 13 Oct 2017 14:54:12 -0700 Subject: otautil: Move RangeSet implementation into rangeset.cpp. Since it has grown much larger, users of the header shouldn't compile and carry their full copies. Also add missing header includes in imgdiff.cpp and imgdiff_test.cpp. Test: mmma bootable/recovery Test: recovery_unit_test; recovery_component_test; recovery_host_test Change-Id: I88ca54171765e5606ab0d61580fbc1ada578fd7d --- applypatch/imgdiff.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 3a61a7d0d..69ad75f37 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -160,6 +160,8 @@ #include #include #include +#include +#include #include #include #include -- cgit v1.2.3 From fa188268e43ab75732a480d6b2ec748d9d0dbfae Mon Sep 17 00:00:00 2001 From: Alex Deymo Date: Tue, 10 Oct 2017 17:56:17 +0200 Subject: Use SuffixArrayIndexInterface opaque type instead of the underlying data pointer. bsdiff interface is changing such that it hides the suffix array pointer from the public interface. This allows to use a different suffix array data size depending on the input size, running much faster in the normal case. Bug: 34220646 Test: `make checkbuild`; Ran an incremental update generation on a non-A/B device. Change-Id: I78e766da56cf28bc7774b8c8e58527bc11d919fb --- applypatch/imgdiff.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index c887a854d..ccd68dc3e 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -161,7 +161,7 @@ #include #include #include -#include +#include #include #include @@ -322,7 +322,8 @@ void ImageChunk::MergeAdjacentNormal(const ImageChunk& other) { } bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, - std::vector* patch_data, saidx_t** bsdiff_cache) { + std::vector* patch_data, + bsdiff::SuffixArrayIndexInterface** bsdiff_cache) { #if defined(__ANDROID__) char ptemp[] = "/data/local/tmp/imgdiff-patch-XXXXXX"; #else @@ -1081,7 +1082,7 @@ bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); patch_chunks->clear(); - saidx_t* bsdiff_cache = nullptr; + bsdiff::SuffixArrayIndexInterface* bsdiff_cache = nullptr; for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { const auto& tgt_chunk = tgt_image[i]; @@ -1095,7 +1096,8 @@ bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, : src_image.FindChunkByName(tgt_chunk.GetEntryName()); const auto& src_ref = (src_chunk == nullptr) ? src_image.PseudoSource() : *src_chunk; - saidx_t** bsdiff_cache_ptr = (src_chunk == nullptr) ? &bsdiff_cache : nullptr; + bsdiff::SuffixArrayIndexInterface** bsdiff_cache_ptr = + (src_chunk == nullptr) ? &bsdiff_cache : nullptr; std::vector patch_data; if (!ImageChunk::MakePatch(tgt_chunk, src_ref, &patch_data, bsdiff_cache_ptr)) { @@ -1112,7 +1114,7 @@ bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, patch_chunks->emplace_back(tgt_chunk, src_ref, std::move(patch_data)); } } - free(bsdiff_cache); + delete bsdiff_cache; CHECK_EQ(patch_chunks->size(), tgt_image.NumOfChunks()); return true; -- cgit v1.2.3 From 6e293c99c1c8818fd28edc8dcfc13b5e4f048188 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Wed, 15 Nov 2017 16:26:41 -0800 Subject: Switch imgdiff to libbase logging Also add a verbose option. And we won't print messages of 'info' severity unless '-v' is present. Test: run imgdiff and check the logs. Change-Id: I1b90874baea8e72e2a2323a0b63bc5d35e653e6b --- applypatch/imgdiff.cpp | 199 ++++++++++++++++++++++++++----------------------- 1 file changed, 107 insertions(+), 92 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index f57e7942c..3dae63d4b 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -175,7 +175,7 @@ using android::base::get_unaligned; static constexpr size_t VERSION = 2; // We assume the header "IMGDIFF#" is 8 bytes. -static_assert(VERSION <= 9, "VERSION occupies more than one byte."); +static_assert(VERSION <= 9, "VERSION occupies more than one byte"); static constexpr size_t BLOCK_SIZE = 4096; static constexpr size_t BUFFER_SIZE = 0x8000; @@ -229,8 +229,8 @@ static bool RemoveUsedBlocks(size_t* start, size_t* length, const SortedRangeSet } // TODO find the largest non-overlap chunk. - printf("Removing block %s from %zu - %zu\n", used_ranges.ToString().c_str(), *start, - *start + *length - 1); + LOG(INFO) << "Removing block " << used_ranges.ToString() << " from " << *start << " - " + << *start + *length - 1; // If there's no duplicate entry name, we should only overlap in the head or tail block. Try to // trim both blocks. Skip this source chunk in case it still overlaps with the used ranges. @@ -241,7 +241,7 @@ static bool RemoveUsedBlocks(size_t* start, size_t* length, const SortedRangeSet return true; } - printf("Failed to remove the overlapped block ranges; skip the source\n"); + LOG(WARNING) << "Failed to remove the overlapped block ranges; skip the source"; return false; } @@ -251,6 +251,7 @@ static const struct option OPTIONS[] = { { "block-limit", required_argument, nullptr, 0 }, { "debug-dir", required_argument, nullptr, 0 }, { "split-info", required_argument, nullptr, 0 }, + { "verbose", no_argument, nullptr, 'v' }, { nullptr, 0, nullptr, 0 }, }; @@ -284,6 +285,11 @@ size_t ImageChunk::DataLengthForPatch() const { return raw_data_len_; } +void ImageChunk::Dump(size_t index) const { + LOG(INFO) << "chunk: " << index << ", type: " << type_ << ", start: " << start_ + << ", len: " << DataLengthForPatch() << ", name: " << entry_name_; +} + bool ImageChunk::operator==(const ImageChunk& other) const { if (type_ != other.type_) { return false; @@ -334,7 +340,7 @@ bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, int fd = mkstemp(ptemp); if (fd == -1) { - printf("MakePatch failed to create a temporary file: %s\n", strerror(errno)); + PLOG(ERROR) << "MakePatch failed to create a temporary file"; return false; } close(fd); @@ -342,18 +348,18 @@ bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, int r = bsdiff::bsdiff(src.DataForPatch(), src.DataLengthForPatch(), tgt.DataForPatch(), tgt.DataLengthForPatch(), ptemp, bsdiff_cache); if (r != 0) { - printf("bsdiff() failed: %d\n", r); + LOG(ERROR) << "bsdiff() failed: " << r; return false; } android::base::unique_fd patch_fd(open(ptemp, O_RDONLY)); if (patch_fd == -1) { - printf("failed to open %s: %s\n", ptemp, strerror(errno)); + PLOG(ERROR) << "Failed to open " << ptemp; return false; } struct stat st; if (fstat(patch_fd, &st) != 0) { - printf("failed to stat patch file %s: %s\n", ptemp, strerror(errno)); + PLOG(ERROR) << "Failed to stat patch file " << ptemp; return false; } @@ -361,7 +367,7 @@ bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, patch_data->resize(sz); if (!android::base::ReadFully(patch_fd, patch_data->data(), sz)) { - printf("failed to read \"%s\" %s\n", ptemp, strerror(errno)); + PLOG(ERROR) << "Failed to read " << ptemp; unlink(ptemp); return false; } @@ -373,7 +379,7 @@ bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, bool ImageChunk::ReconstructDeflateChunk() { if (type_ != CHUNK_DEFLATE) { - printf("attempt to reconstruct non-deflate chunk\n"); + LOG(ERROR) << "Attempted to reconstruct non-deflate chunk"; return false; } @@ -403,7 +409,7 @@ bool ImageChunk::TryReconstruction(int level) { strm.next_in = uncompressed_data_.data(); int ret = deflateInit2(&strm, level, METHOD, WINDOWBITS, MEMLEVEL, STRATEGY); if (ret < 0) { - printf("failed to initialize deflate: %d\n", ret); + LOG(ERROR) << "Failed to initialize deflate: " << ret; return false; } @@ -414,7 +420,7 @@ bool ImageChunk::TryReconstruction(int level) { strm.next_out = buffer.data(); ret = deflate(&strm, Z_FINISH); if (ret < 0) { - printf("failed to deflate: %d\n", ret); + LOG(ERROR) << "Failed to deflate: " << ret; return false; } @@ -490,17 +496,19 @@ size_t PatchChunk::GetHeaderSize() const { } // Return the offset of the next patch into the patch data. -size_t PatchChunk::WriteHeaderToFd(int fd, size_t offset) const { +size_t PatchChunk::WriteHeaderToFd(int fd, size_t offset, size_t index) const { Write4(fd, type_); switch (type_) { case CHUNK_NORMAL: - printf("normal (%10zu, %10zu) %10zu\n", target_start_, target_len_, data_.size()); + LOG(INFO) << android::base::StringPrintf("chunk %zu: normal (%10zu, %10zu) %10zu", index, + target_start_, target_len_, data_.size()); Write8(fd, static_cast(source_start_)); Write8(fd, static_cast(source_len_)); Write8(fd, static_cast(offset)); return offset + data_.size(); case CHUNK_DEFLATE: - printf("deflate (%10zu, %10zu) %10zu\n", target_start_, target_len_, data_.size()); + LOG(INFO) << android::base::StringPrintf("chunk %zu: deflate (%10zu, %10zu) %10zu", index, + target_start_, target_len_, data_.size()); Write8(fd, static_cast(source_start_)); Write8(fd, static_cast(source_len_)); Write8(fd, static_cast(offset)); @@ -513,10 +521,11 @@ size_t PatchChunk::WriteHeaderToFd(int fd, size_t offset) const { Write4(fd, ImageChunk::STRATEGY); return offset + data_.size(); case CHUNK_RAW: - printf("raw (%10zu, %10zu)\n", target_start_, target_len_); + LOG(INFO) << android::base::StringPrintf("chunk %zu: raw (%10zu, %10zu)", index, + target_start_, target_len_); Write4(fd, static_cast(data_.size())); if (!android::base::WriteFully(fd, data_.data(), data_.size())) { - CHECK(false) << "failed to write " << data_.size() << " bytes patch"; + CHECK(false) << "Failed to write " << data_.size() << " bytes patch"; } return offset; default: @@ -545,14 +554,14 @@ bool PatchChunk::WritePatchDataToFd(const std::vector& patch_chunks, // Write out the headers. if (!android::base::WriteStringToFd("IMGDIFF" + std::to_string(VERSION), patch_fd)) { - printf("failed to write \"IMGDIFF%zu\": %s\n", VERSION, strerror(errno)); + PLOG(ERROR) << "Failed to write \"IMGDIFF" << VERSION << "\""; return false; } Write4(patch_fd, static_cast(patch_chunks.size())); + LOG(INFO) << "Writing " << patch_chunks.size() << " patch headers..."; for (size_t i = 0; i < patch_chunks.size(); ++i) { - printf("chunk %zu: ", i); - offset = patch_chunks[i].WriteHeaderToFd(patch_fd, offset); + offset = patch_chunks[i].WriteHeaderToFd(patch_fd, offset, i); } // Append each chunk's bsdiff patch, in order. @@ -561,7 +570,7 @@ bool PatchChunk::WritePatchDataToFd(const std::vector& patch_chunks, continue; } if (!android::base::WriteFully(patch_fd, patch.data_.data(), patch.data_.size())) { - printf("failed to write %zu bytes patch to patch_fd\n", patch.data_.size()); + PLOG(ERROR) << "Failed to write " << patch.data_.size() << " bytes patch to patch_fd"; return false; } } @@ -603,10 +612,9 @@ void Image::MergeAdjacentNormalChunks() { void Image::DumpChunks() const { std::string type = is_source_ ? "source" : "target"; - printf("Dumping chunks for %s\n", type.c_str()); + LOG(INFO) << "Dumping chunks for " << type; for (size_t i = 0; i < chunks_.size(); ++i) { - printf("chunk %zu: ", i); - chunks_[i].Dump(); + chunks_[i].Dump(i); } } @@ -615,19 +623,19 @@ bool Image::ReadFile(const std::string& filename, std::vector* file_con android::base::unique_fd fd(open(filename.c_str(), O_RDONLY)); if (fd == -1) { - printf("failed to open \"%s\" %s\n", filename.c_str(), strerror(errno)); + PLOG(ERROR) << "Failed to open " << filename; return false; } struct stat st; if (fstat(fd, &st) != 0) { - printf("failed to stat \"%s\": %s\n", filename.c_str(), strerror(errno)); + PLOG(ERROR) << "Failed to stat " << filename; return false; } size_t sz = static_cast(st.st_size); file_content->resize(sz); if (!android::base::ReadFully(fd, file_content->data(), sz)) { - printf("failed to read \"%s\" %s\n", filename.c_str(), strerror(errno)); + PLOG(ERROR) << "Failed to read " << filename; return false; } fd.reset(); @@ -643,14 +651,14 @@ bool ZipModeImage::Initialize(const std::string& filename) { // Omit the trailing zeros before we pass the file to ziparchive handler. size_t zipfile_size; if (!GetZipFileSize(&zipfile_size)) { - printf("failed to parse the actual size of %s\n", filename.c_str()); + LOG(ERROR) << "Failed to parse the actual size of " << filename; return false; } ZipArchiveHandle handle; int err = OpenArchiveFromMemory(const_cast(file_content_.data()), zipfile_size, filename.c_str(), &handle); if (err != 0) { - printf("failed to open zip file %s: %s\n", filename.c_str(), ErrorCodeString(err)); + LOG(ERROR) << "Failed to open zip file " << filename << ": " << ErrorCodeString(err); CloseArchive(handle); return false; } @@ -669,7 +677,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl void* cookie; int ret = StartIteration(handle, &cookie, nullptr, nullptr); if (ret != 0) { - printf("failed to iterate over entries in %s: %s\n", filename.c_str(), ErrorCodeString(ret)); + LOG(ERROR) << "Failed to iterate over entries in " << filename << ": " << ErrorCodeString(ret); return false; } @@ -685,7 +693,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl } if (ret != -1) { - printf("Error while iterating over zip entries: %s\n", ErrorCodeString(ret)); + LOG(ERROR) << "Error while iterating over zip entries: " << ErrorCodeString(ret); return false; } std::sort(temp_entries.begin(), temp_entries.end(), @@ -697,7 +705,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl if (is_source_) { for (auto& entry : temp_entries) { if (!AddZipEntryToChunks(handle, entry.first, &entry.second)) { - printf("Failed to add %s to source chunks\n", entry.first.c_str()); + LOG(ERROR) << "Failed to add " << entry.first << " to source chunks"; return false; } } @@ -725,7 +733,7 @@ bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandl // Add the next zip entry. std::string entry_name = temp_entries[nextentry].first; if (!AddZipEntryToChunks(handle, entry_name, &temp_entries[nextentry].second)) { - printf("Failed to add %s to target chunks\n", entry_name.c_str()); + LOG(ERROR) << "Failed to add " << entry_name << " to target chunks"; return false; } @@ -771,8 +779,8 @@ bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::strin std::vector uncompressed_data(uncompressed_len); int ret = ExtractToMemory(handle, entry, uncompressed_data.data(), uncompressed_len); if (ret != 0) { - printf("failed to extract %s with size %zu: %s\n", entry_name.c_str(), uncompressed_len, - ErrorCodeString(ret)); + LOG(ERROR) << "Failed to extract " << entry_name << " with size " << uncompressed_len << ": " + << ErrorCodeString(ret); return false; } ImageChunk curr(CHUNK_DEFLATE, entry->offset, &file_content_, compressed_len, entry_name); @@ -793,7 +801,7 @@ bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::strin // offset 22: comment, n bytes bool ZipModeImage::GetZipFileSize(size_t* input_file_size) { if (file_content_.size() < 22) { - printf("file is too small to be a zip file\n"); + LOG(ERROR) << "File is too small to be a zip file"; return false; } @@ -872,8 +880,8 @@ bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* } else if (!tgt_chunk.ReconstructDeflateChunk()) { // We cannot recompress the data and get exactly the same bits as are in the input target // image. Treat the chunk as a normal non-deflated chunk. - printf("failed to reconstruct target deflate chunk [%s]; treating as normal\n", - tgt_chunk.GetEntryName().c_str()); + LOG(WARNING) << "Failed to reconstruct target deflate chunk [" << tgt_chunk.GetEntryName() + << "]; treating as normal"; tgt_chunk.ChangeDeflateChunkToNormal(); src_chunk->ChangeDeflateChunkToNormal(); @@ -902,7 +910,7 @@ bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, size_t limit = tgt_image.limit_; src_image.DumpChunks(); - printf("Splitting %zu tgt chunks...\n", tgt_image.NumOfChunks()); + LOG(INFO) << "Splitting " << tgt_image.NumOfChunks() << " tgt chunks..."; SortedRangeSet used_src_ranges; // ranges used for previous split source images. @@ -1049,7 +1057,7 @@ void ZipModeImage::ValidateSplitImages(const std::vector& split_tg size_t total_tgt_size) { CHECK_EQ(split_tgt_images.size(), split_src_images.size()); - printf("Validating %zu images\n", split_tgt_images.size()); + LOG(INFO) << "Validating " << split_tgt_images.size() << " images"; // Verify that the target image pieces is continuous and can add up to the total size. size_t last_offset = 0; @@ -1081,7 +1089,7 @@ void ZipModeImage::ValidateSplitImages(const std::vector& split_tg bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, std::vector* patch_chunks) { - printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); + LOG(INFO) << "Constructing patches for " << tgt_image.NumOfChunks() << " chunks..."; patch_chunks->clear(); bsdiff::SuffixArrayIndexInterface* bsdiff_cache = nullptr; @@ -1103,12 +1111,12 @@ bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, std::vector patch_data; if (!ImageChunk::MakePatch(tgt_chunk, src_ref, &patch_data, bsdiff_cache_ptr)) { - printf("Failed to generate patch, name: %s\n", tgt_chunk.GetEntryName().c_str()); + LOG(ERROR) << "Failed to generate patch, name: " << tgt_chunk.GetEntryName(); return false; } - printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data.size(), - tgt_chunk.GetRawDataLength()); + LOG(INFO) << "patch " << i << " is " << patch_data.size() << " bytes (of " + << tgt_chunk.GetRawDataLength() << ")"; if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { patch_chunks->emplace_back(tgt_chunk); @@ -1133,7 +1141,7 @@ bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeI android::base::unique_fd patch_fd( open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (patch_fd == -1) { - printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + PLOG(ERROR) << "Failed to open " << patch_name; return false; } @@ -1146,12 +1154,12 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im const std::string& patch_name, const std::string& split_info_file, const std::string& debug_dir) { - printf("Construct patches for %zu split images...\n", split_tgt_images.size()); + LOG(INFO) << "Constructing patches for " << split_tgt_images.size() << " split images..."; android::base::unique_fd patch_fd( open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (patch_fd == -1) { - printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + PLOG(ERROR) << "Failed to open " << patch_name; return false; } @@ -1160,7 +1168,7 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im std::vector patch_chunks; if (!ZipModeImage::GeneratePatchesInternal(split_tgt_images[i], split_src_images[i], &patch_chunks)) { - printf("failed to generate split patch\n"); + LOG(ERROR) << "Failed to generate split patch"; return false; } @@ -1188,12 +1196,12 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im open(src_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (fd == -1) { - printf("Failed to open %s\n", src_name.c_str()); + PLOG(ERROR) << "Failed to open " << src_name; return false; } if (!android::base::WriteFully(fd, split_src_images[i].PseudoSource().DataForPatch(), split_src_images[i].PseudoSource().DataLengthForPatch())) { - printf("Failed to write split source data into %s\n", src_name.c_str()); + PLOG(ERROR) << "Failed to write split source data into " << src_name; return false; } @@ -1201,7 +1209,7 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im fd.reset(open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (fd == -1) { - printf("Failed to open %s\n", patch_name.c_str()); + PLOG(ERROR) << "Failed to open " << patch_name; return false; } if (!PatchChunk::WritePatchDataToFd(patch_chunks, fd)) { @@ -1219,8 +1227,7 @@ bool ZipModeImage::GeneratePatches(const std::vector& split_tgt_im std::string split_info_string = android::base::StringPrintf( "%zu\n%zu\n", VERSION, split_info_list.size()) + android::base::Join(split_info_list, '\n'); if (!android::base::WriteStringToFile(split_info_string, split_info_file)) { - printf("failed to write split info to \"%s\": %s\n", split_info_file.c_str(), - strerror(errno)); + PLOG(ERROR) << "Failed to write split info to " << split_info_file; return false; } @@ -1265,7 +1272,7 @@ bool ImageModeImage::Initialize(const std::string& filename) { // not expect zlib headers. int ret = inflateInit2(&strm, -15); if (ret < 0) { - printf("failed to initialize inflate: %d\n", ret); + LOG(ERROR) << "Failed to initialize inflate: " << ret; return false; } @@ -1277,8 +1284,8 @@ bool ImageModeImage::Initialize(const std::string& filename) { strm.next_out = uncompressed_data.data() + uncompressed_len; ret = inflate(&strm, Z_NO_FLUSH); if (ret < 0) { - printf("Warning: inflate failed [%s] at offset [%zu], treating as a normal chunk\n", - strm.msg, chunk_offset); + LOG(WARNING) << "Inflate failed [" << strm.msg << "] at offset [" << chunk_offset + << "]; treating as a normal chunk"; break; } uncompressed_len = allocated - strm.avail_out; @@ -1299,13 +1306,13 @@ bool ImageModeImage::Initialize(const std::string& filename) { // matches the size of the data we got when we actually did the decompression. size_t footer_index = pos + raw_data_len + GZIP_FOOTER_LEN - 4; if (sz - footer_index < 4) { - printf("Warning: invalid footer position; treating as a nomal chunk\n"); + LOG(WARNING) << "invalid footer position; treating as a normal chunk"; continue; } size_t footer_size = get_unaligned(file_content_.data() + footer_index); if (footer_size != uncompressed_len) { - printf("Warning: footer size %zu != decompressed size %zu; treating as a nomal chunk\n", - footer_size, uncompressed_len); + LOG(WARNING) << "footer size " << footer_size << " != " << uncompressed_len + << "; treating as a normal chunk"; continue; } @@ -1345,12 +1352,12 @@ bool ImageModeImage::Initialize(const std::string& filename) { bool ImageModeImage::SetBonusData(const std::vector& bonus_data) { CHECK(is_source_); if (chunks_.size() < 2 || !chunks_[1].SetBonusData(bonus_data)) { - printf("Failed to set bonus data\n"); + LOG(ERROR) << "Failed to set bonus data"; DumpChunks(); return false; } - printf(" using %zu bytes of bonus data\n", bonus_data.size()); + LOG(INFO) << " using " << bonus_data.size() << " bytes of bonus data"; return true; } @@ -1362,14 +1369,14 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI src_image->MergeAdjacentNormalChunks(); if (tgt_image->NumOfChunks() != src_image->NumOfChunks()) { - printf("source and target don't have same number of chunks!\n"); + LOG(ERROR) << "Source and target don't have same number of chunks!"; tgt_image->DumpChunks(); src_image->DumpChunks(); return false; } for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { if ((*tgt_image)[i].GetType() != (*src_image)[i].GetType()) { - printf("source and target don't have same chunk structure! (chunk %zu)\n", i); + LOG(ERROR) << "Source and target don't have same chunk structure! (chunk " << i << ")"; tgt_image->DumpChunks(); src_image->DumpChunks(); return false; @@ -1390,8 +1397,8 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI } else if (!tgt_chunk.ReconstructDeflateChunk()) { // We cannot recompress the data and get exactly the same bits as are in the input target // image, fall back to normal - printf("failed to reconstruct target deflate chunk %zu [%s]; treating as normal\n", i, - tgt_chunk.GetEntryName().c_str()); + LOG(WARNING) << "Failed to reconstruct target deflate chunk " << i << " [" + << tgt_chunk.GetEntryName() << "]; treating as normal"; tgt_chunk.ChangeDeflateChunkToNormal(); src_chunk.ChangeDeflateChunkToNormal(); } @@ -1403,7 +1410,7 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI src_image->MergeAdjacentNormalChunks(); if (tgt_image->NumOfChunks() != src_image->NumOfChunks()) { // This shouldn't happen. - printf("merging normal chunks went awry\n"); + LOG(ERROR) << "Merging normal chunks went awry"; return false; } @@ -1415,7 +1422,7 @@ bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeI bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image, const std::string& patch_name) { - printf("Construct patches for %zu chunks...\n", tgt_image.NumOfChunks()); + LOG(INFO) << "Constructing patches for " << tgt_image.NumOfChunks() << " chunks..."; std::vector patch_chunks; patch_chunks.reserve(tgt_image.NumOfChunks()); @@ -1430,11 +1437,11 @@ bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, std::vector patch_data; if (!ImageChunk::MakePatch(tgt_chunk, src_chunk, &patch_data, nullptr)) { - printf("Failed to generate patch for target chunk %zu: ", i); + LOG(ERROR) << "Failed to generate patch for target chunk " << i; return false; } - printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data.size(), - tgt_chunk.GetRawDataLength()); + LOG(INFO) << "patch " << i << " is " << patch_data.size() << " bytes (of " + << tgt_chunk.GetRawDataLength() << ")"; if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { patch_chunks.emplace_back(tgt_chunk); @@ -1448,7 +1455,7 @@ bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, android::base::unique_fd patch_fd( open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); if (patch_fd == -1) { - printf("failed to open \"%s\": %s\n", patch_name.c_str(), strerror(errno)); + PLOG(ERROR) << "Failed to open " << patch_name; return false; } @@ -1456,6 +1463,7 @@ bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, } int imgdiff(int argc, const char** argv) { + bool verbose = false; bool zip_mode = false; std::vector bonus_data; size_t blocks_limit = 0; @@ -1464,9 +1472,10 @@ int imgdiff(int argc, const char** argv) { int opt; int option_index; - optind = 1; // Reset the getopt state so that we can call it multiple times for test. + optind = 0; // Reset the getopt state so that we can call it multiple times for test. - while ((opt = getopt_long(argc, const_cast(argv), "zb:", OPTIONS, &option_index)) != -1) { + while ((opt = getopt_long(argc, const_cast(argv), "zb:v", OPTIONS, &option_index)) != + -1) { switch (opt) { case 'z': zip_mode = true; @@ -1474,27 +1483,30 @@ int imgdiff(int argc, const char** argv) { case 'b': { android::base::unique_fd fd(open(optarg, O_RDONLY)); if (fd == -1) { - printf("failed to open bonus file %s: %s\n", optarg, strerror(errno)); + PLOG(ERROR) << "Failed to open bonus file " << optarg; return 1; } struct stat st; if (fstat(fd, &st) != 0) { - printf("failed to stat bonus file %s: %s\n", optarg, strerror(errno)); + PLOG(ERROR) << "Failed to stat bonus file " << optarg; return 1; } size_t bonus_size = st.st_size; bonus_data.resize(bonus_size); if (!android::base::ReadFully(fd, bonus_data.data(), bonus_size)) { - printf("failed to read bonus file %s: %s\n", optarg, strerror(errno)); + PLOG(ERROR) << "Failed to read bonus file " << optarg; return 1; } break; } + case 'v': + verbose = true; + break; case 0: { std::string name = OPTIONS[option_index].name; if (name == "block-limit" && !android::base::ParseUint(optarg, &blocks_limit)) { - printf("failed to parse size blocks_limit: %s\n", optarg); + LOG(ERROR) << "Failed to parse size blocks_limit: " << optarg; return 1; } else if (name == "split-info") { split_info_file = optarg; @@ -1504,22 +1516,28 @@ int imgdiff(int argc, const char** argv) { break; } default: - printf("unexpected opt: %s\n", optarg); + LOG(ERROR) << "unexpected opt: " << static_cast(opt); return 2; } } + if (!verbose) { + android::base::SetMinimumLogSeverity(android::base::WARNING); + } + if (argc - optind != 3) { - printf("usage: %s [options] \n", argv[0]); - printf( - " -z , Generate patches in zip mode, src and tgt should be zip files.\n" - " -b , Bonus file in addition to src, image mode only.\n" - " --block-limit, For large zips, split the src and tgt based on the block limit;\n" - " and generate patches between each pair of pieces. Concatenate these\n" - " patches together and output them into .\n" - " --split-info, Output the split information (patch_size, tgt_size, src_ranges);\n" - " zip mode with block-limit only.\n" - " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n"); + LOG(ERROR) << "usage: " << argv[0] << " [options] "; + LOG(ERROR) + << " -z , Generate patches in zip mode, src and tgt should be zip files.\n" + " -b , Bonus file in addition to src, image mode only.\n" + " --block-limit, For large zips, split the src and tgt based on the block limit;\n" + " and generate patches between each pair of pieces. Concatenate " + "these\n" + " patches together and output them into .\n" + " --split-info, Output the split information (patch_size, tgt_size, src_ranges);\n" + " zip mode with block-limit only.\n" + " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n" + " -v, --verbose, Enable verbose logging."; return 2; } @@ -1538,14 +1556,11 @@ int imgdiff(int argc, const char** argv) { return 1; } - // TODO save and output the split information so that caller can create split transfer lists - // accordingly. - // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of // deflate chunks). if (blocks_limit > 0) { if (split_info_file.empty()) { - printf("split-info path cannot be empty when generating patches with a block-limit.\n"); + LOG(ERROR) << "split-info path cannot be empty when generating patches with a block-limit"; return 1; } -- cgit v1.2.3 From 572abbb81cfa12cddf742fa35cd8a4b9eebdc7d1 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Thu, 22 Feb 2018 15:40:39 -0800 Subject: Remove the assumption of target chunk size in imgdiff In the split mode of imgdiff, we used to assume that the size of a split target chunk is always greater than the blocksize i.e. 4096. This may lead to the following assertion failure: I0221 04:57:33.451323 818464 common.py:205 imgdiff F 02-21 04:57:33 821203 821203 imgdiff.cpp:999] Check failed: tgt_size >= BLOCK_SIZE (tgt_size=476, BLOCK_SIZE=4096) This CL removes the assumption and handles the edge cases. Test: generate and verify the incremental update for TFs in the bug; unit test passes Bug: 73757557 Bug: 73711365 Change-Id: Iadbb4ee658995f5856cd488f3793980881a59620 --- applypatch/imgdiff.cpp | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'applypatch/imgdiff.cpp') diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index 3dae63d4b..674cc2b16 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -955,14 +955,17 @@ bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, tgt->GetRawDataLength()); } } else { - ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, - split_src_chunks, split_tgt_images, - split_src_images); + bool added_image = ZipModeImage::AddSplitImageFromChunkList( + tgt_image, src_image, src_ranges, split_tgt_chunks, split_src_chunks, split_tgt_images, + split_src_images); split_tgt_chunks.clear(); split_src_chunks.clear(); - used_src_ranges.Insert(src_ranges); - split_src_ranges->push_back(std::move(src_ranges)); + // No need to update the split_src_ranges if we don't update the split source images. + if (added_image) { + used_src_ranges.Insert(src_ranges); + split_src_ranges->push_back(std::move(src_ranges)); + } src_ranges.Clear(); // We don't have enough space for the current chunk; start a new split image and handle @@ -973,9 +976,12 @@ bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, // TODO Trim it in case the CD exceeds limit too much. src_ranges.Insert(central_directory->GetStartOffset(), central_directory->DataLengthForPatch()); - ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, split_tgt_chunks, - split_src_chunks, split_tgt_images, split_src_images); - split_src_ranges->push_back(std::move(src_ranges)); + bool added_image = ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, + split_tgt_chunks, split_src_chunks, + split_tgt_images, split_src_images); + if (added_image) { + split_src_ranges->push_back(std::move(src_ranges)); + } ValidateSplitImages(*split_tgt_images, *split_src_images, *split_src_ranges, tgt_image.file_content_.size()); @@ -983,7 +989,7 @@ bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, return true; } -void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, +bool ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, const ZipModeImage& src_image, const SortedRangeSet& split_src_ranges, const std::vector& split_tgt_chunks, @@ -991,12 +997,6 @@ void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, std::vector* split_tgt_images, std::vector* split_src_images) { CHECK(!split_tgt_chunks.empty()); - // Target chunks should occupy at least one block. - // TODO put a warning and change the type to raw if it happens in extremely rare cases. - size_t tgt_size = split_tgt_chunks.back().GetStartOffset() + - split_tgt_chunks.back().DataLengthForPatch() - - split_tgt_chunks.front().GetStartOffset(); - CHECK_GE(tgt_size, BLOCK_SIZE); std::vector aligned_tgt_chunks; @@ -1015,7 +1015,12 @@ void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, i++; } - CHECK_LT(i, split_tgt_chunks.size()); + + // Nothing left after alignment in the current split tgt chunks; skip adding the split_tgt_image. + if (i == split_tgt_chunks.size()) { + return false; + } + aligned_tgt_chunks.insert(aligned_tgt_chunks.end(), split_tgt_chunks.begin() + i + 1, split_tgt_chunks.end()); CHECK(!aligned_tgt_chunks.empty()); @@ -1024,8 +1029,10 @@ void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, size_t end_offset = aligned_tgt_chunks.back().GetStartOffset() + aligned_tgt_chunks.back().GetRawDataLength(); if (end_offset % BLOCK_SIZE != 0 && end_offset < tgt_image.file_content_.size()) { + size_t tail_block_length = std::min(tgt_image.file_content_.size() - end_offset, + BLOCK_SIZE - (end_offset % BLOCK_SIZE)); aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, end_offset, &tgt_image.file_content_, - BLOCK_SIZE - (end_offset % BLOCK_SIZE)); + tail_block_length); } ZipModeImage split_tgt_image(false); @@ -1049,6 +1056,8 @@ void ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, split_tgt_images->push_back(std::move(split_tgt_image)); split_src_images->push_back(std::move(split_src_image)); + + return true; } void ZipModeImage::ValidateSplitImages(const std::vector& split_tgt_images, @@ -1536,7 +1545,7 @@ int imgdiff(int argc, const char** argv) { " patches together and output them into .\n" " --split-info, Output the split information (patch_size, tgt_size, src_ranges);\n" " zip mode with block-limit only.\n" - " --debug_dir, Debug directory to put the split srcs and patches, zip mode only.\n" + " --debug-dir, Debug directory to put the split srcs and patches, zip mode only.\n" " -v, --verbose, Enable verbose logging."; return 2; } -- cgit v1.2.3