applypatch/include/applypatch/imgdiff_image.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _APPLYPATCH_IMGDIFF_IMAGE_H
#define _APPLYPATCH_IMGDIFF_IMAGE_H

#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>

#include <string>
#include <vector>

#include <bsdiff.h>
#include <ziparchive/zip_archive.h>
#include <zlib.h>

#include "imgdiff.h"
#include "rangeset.h"

class ImageChunk {
 public:
  static constexpr auto WINDOWBITS = -15;  // 32kb window; negative to indicate a raw stream.
  static constexpr auto MEMLEVEL = 8;      // the default value.
  static constexpr auto METHOD = Z_DEFLATED;
  static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY;

  ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len,
             std::string entry_name = {});

  int GetType() const {
    return type_;
  }
  size_t GetRawDataLength() const {
    return raw_data_len_;
  }
  const std::string& GetEntryName() const {
    return entry_name_;
  }
  size_t GetStartOffset() const {
    return start_;
  }
  int GetCompressLevel() const {
    return compress_level_;
  }

  // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return
  // the raw data.
  const uint8_t* DataForPatch() const;
  size_t DataLengthForPatch() const;

  void Dump() const {
    printf("type: %d, start: %zu, len: %zu, name: %s\n", type_, start_, DataLengthForPatch(),
           entry_name_.c_str());
  }

  void SetUncompressedData(std::vector<uint8_t> data);
  bool SetBonusData(const std::vector<uint8_t>& bonus_data);

  bool operator==(const ImageChunk& other) const;
  bool operator!=(const ImageChunk& other) const {
    return !(*this == other);
  }

  /*
   * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data).
   * The resulting patch will likely be about as big as the target file, but it lets us handle
   * the case of images where some gzip chunks are reconstructible but others aren't (by treating
   * the ones that aren't as normal chunks).
   */
  void ChangeDeflateChunkToNormal();

  /*
   * Verify that we can reproduce exactly the same compressed data that we started with.  Sets the
   * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding
   * parameters needed to produce the right output.
   */
  bool ReconstructDeflateChunk();
  bool IsAdjacentNormal(const ImageChunk& other) const;
  void MergeAdjacentNormal(const ImageChunk& other);

  /*
   * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data.
   * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used
   * repeatedly, pass nullptr if not needed.
   */
  static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src,
                        std::vector<uint8_t>* patch_data, saidx_t** bsdiff_cache);

 private:
  const uint8_t* GetRawData() const;
  bool TryReconstruction(int level);

  int type_;                                    // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW
  size_t start_;                                // offset of chunk in the original input file
  const std::vector<uint8_t>* input_file_ptr_;  // ptr to the full content of original input file
  size_t raw_data_len_;

  // deflate encoder parameters
  int compress_level_;

  // --- for CHUNK_DEFLATE chunks only: ---
  std::vector<uint8_t> uncompressed_data_;
  std::string entry_name_;  // used for zip entries
};

// PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track
// of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length).
class PatchChunk {
 public:
  PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data);

  // Construct a CHUNK_RAW patch from the target data directly.
  explicit PatchChunk(const ImageChunk& tgt);

  // Return true if raw data size is smaller than the patch size.
  static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size);

  // Update the source start with the new offset within the source range.
  void UpdateSourceOffset(const SortedRangeSet& src_range);

  static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd);

 private:
  size_t GetHeaderSize() const;
  size_t WriteHeaderToFd(int fd, size_t offset) const;

  // The patch chunk type is the same as the target chunk type. The only exception is we change
  // the |type_| to CHUNK_RAW if target length is smaller than the patch size.
  int type_;

  size_t source_start_;
  size_t source_len_;
  size_t source_uncompressed_len_;

  size_t target_start_;  // offset of the target chunk within the target file
  size_t target_len_;
  size_t target_uncompressed_len_;
  size_t target_compress_level_;  // the deflate compression level of the target chunk.

  std::vector<uint8_t> data_;  // storage for the patch data
};

// Interface for zip_mode and image_mode images. We initialize the image from an input file and
// split the file content into a list of image chunks.
class Image {
 public:
  explicit Image(bool is_source) : is_source_(is_source) {}

  virtual ~Image() {}

  // Create a list of image chunks from input file.
  virtual bool Initialize(const std::string& filename) = 0;

  // Look for runs of adjacent normal chunks and compress them down into a single chunk.  (Such
  // runs can be produced when deflate chunks are changed to normal chunks.)
  void MergeAdjacentNormalChunks();

  void DumpChunks() const;

  // Non const iterators to access the stored ImageChunks.
  std::vector<ImageChunk>::iterator begin() {
    return chunks_.begin();
  }

  std::vector<ImageChunk>::iterator end() {
    return chunks_.end();
  }

  std::vector<ImageChunk>::const_iterator cbegin() const {
    return chunks_.cbegin();
  }

  std::vector<ImageChunk>::const_iterator cend() const {
    return chunks_.cend();
  }

  ImageChunk& operator[](size_t i);
  const ImageChunk& operator[](size_t i) const;

  size_t NumOfChunks() const {
    return chunks_.size();
  }

 protected:
  bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content);

  bool is_source_;                     // True if it's for source chunks.
  std::vector<ImageChunk> chunks_;     // Internal storage of ImageChunk.
  std::vector<uint8_t> file_content_;  // Store the whole input file in memory.
};

class ZipModeImage : public Image {
 public:
  explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {}

  bool Initialize(const std::string& filename) override;

  // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we
  // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img
  // pieces; because for each chunk both the data and their offset within the file are unchanged.
  void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) {
    chunks_ = chunks;
    file_content_ = file_content;
  }

  // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in
  // fact the whole source file.
  ImageChunk PseudoSource() const;

  // Find the matching deflate source chunk by entry name. Search for normal chunks also if
  // |find_normal| is true.
  ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false);

  const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const;

  // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if
  // src and tgt are identical.
  static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image);

  // Compute the patch between tgt & src images, and write the data into |patch_name|.
  static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
                              const std::string& patch_name);

  // Compute the patch based on the lists of split src and tgt images. Generate patches for each
  // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write
  // each split src data and patch data into that directory.
  static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images,
                              const std::vector<ZipModeImage>& split_src_images,
                              const std::vector<SortedRangeSet>& split_src_ranges,
                              const std::string& patch_name, const std::string& debug_dir);

  // Split the tgt chunks and src chunks based on the size limit.
  static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image,
                                         const ZipModeImage& src_image,
                                         std::vector<ZipModeImage>* split_tgt_images,
                                         std::vector<ZipModeImage>* split_src_images,
                                         std::vector<SortedRangeSet>* split_src_ranges);

 private:
  // Initialize image chunks based on the zip entries.
  bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle);
  // Add the a zip entry to the list.
  bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry);
  // Return the real size of the zip file. (omit the trailing zeros that used for alignment)
  bool GetZipFileSize(size_t* input_file_size);

  static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images,
                                  const std::vector<ZipModeImage>& split_src_images,
                                  std::vector<SortedRangeSet>& split_src_ranges,
                                  size_t total_tgt_size);
  // Construct the dummy split images based on the chunks info and source ranges; and move them into
  // the given vectors.
  static void AddSplitImageFromChunkList(const ZipModeImage& tgt_image,
                                         const ZipModeImage& src_image,
                                         const SortedRangeSet& split_src_ranges,
                                         const std::vector<ImageChunk>& split_tgt_chunks,
                                         const std::vector<ImageChunk>& split_src_chunks,
                                         std::vector<ZipModeImage>* split_tgt_images,
                                         std::vector<ZipModeImage>* split_src_images);

  // Function that actually iterates the tgt_chunks and makes patches.
  static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
                                      std::vector<PatchChunk>* patch_chunks);

  // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit,
  // we'll split that entry into several smaller chunks in advance.
  size_t limit_;
};

class ImageModeImage : public Image {
 public:
  explicit ImageModeImage(bool is_source) : Image(is_source) {}

  // Initialize the image chunks list by searching the magic numbers in an image file.
  bool Initialize(const std::string& filename) override;

  bool SetBonusData(const std::vector<uint8_t>& bonus_data);

  // In Image Mode, verify that the source and target images have the same chunk structure (ie, the
  // same sequence of deflate and normal chunks).
  static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image);

  // In image mode, generate patches against the given source chunks and bonus_data; write the
  // result to |patch_name|.
  static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image,
                              const std::string& patch_name);
};

#endif  // _APPLYPATCH_IMGDIFF_IMAGE_H