from bencodepy import decode from enum import Enum from hashlib import sha1, sha256 from os import scandir from re import search, IGNORECASE class Type(Enum): UNDEF = 0, V1 = 1, V2 = 2, HYBRID = 3 class Torrent(): def __init__(self): self.sha1 = b'' self.files = {} self.type = Type.UNDEF def file(self, f): self.parse(open(f, "rb").read()) def parse(self, b): infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')] self.sha1 = sha1(infodict).digest() self.sha256 = sha256(infodict).digest() self.dict = decode(b) if b'pieces' in self.dict.get(b'info'): self.dict.get(b'info').pop(b'pieces') if b'files' in self.dict.get(b'info').keys(): self.type = Type.V1 for file in self.dict.get(b'info').get(b'files'): if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'): continue def insert_file(d, path, length, self): name = path.pop() if not len(path): d[name] = length return if name not in d.keys(): d[name] = {} insert_file(d[name], path, length, self) file.get(b'path').reverse() insert_file(self.files, file.get(b'path'), file.get(b'length'), self) self.dict.get(b'info').pop(b'files') if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first if self.type is Type.V1: self.type = Type.HYBRID else: def filetree(names): r = {} for key in names.keys(): if key == b'': return names.get(key).get(b'length') r[key] = filetree(names.get(key)) return r self.files = filetree(self.dict.get(b'info').get(b'file tree')) self.dict.get(b'info').pop(b'file tree') if not len(self.files): self.type = Type.V1 self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length') first_filename = [i for i in self.files.keys()][0] if len(self.files) == 1 and self.files[first_filename] == {}: print("fixed bad single file torrent", self.sha1.hex()) self.files[first_filename] = self.dict.get(b'info').get(b'length') def paths(self): def paths_r(d, path=None): if path is None: path = [] for f in d.keys(): if type(d[f]) is int: z = path.copy() z.append(f) yield z, d[f] else: z = path.copy() z.append(f) for z, v in paths_r(d[f], z): yield z, v for z, v in paths_r(self.files): yield z, v def matches(self, r): if search(r, self.dict.get(b'info').get(b'name'), IGNORECASE): return True for path, size in paths(self): if search(r, path, IGNORECASE): return True return False def matching_files(self, r): def matching_files_r(dir, r): files = {} for name, content in self.paths: if search(r, name, IGNORECASE): files[name] = content if type(content) is dict: inhalt = matching_files_r(content, r) if inhalt: files[name] = inhalt return files return matching_files_r(self.paths, r) def __repr__(self): return str(self.__dict__) def __hash__(self): if len(self.sha1): return int.from_bytes(self.sha1, byteorder="big") return id(self) def glob(d): r = {} for f in scandir(d): if f.name.endswith(".torrent") and f.is_file(): t = Torrent() t.file(f.path) r[t.sha1] = t return r