summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Luka Šijanec <anton@sijanec.eu>2023-02-21 15:18:10 +0100
committerAnton Luka Šijanec <anton@sijanec.eu>2023-02-21 15:18:10 +0100
commitbc053a9b414722990e9d62cce9f32963cfe43096 (patch)
treefb9c76e79c518ab47bb5b399ab64c53fdcf0e8e9
parentpie -> barh (diff)
downloadtravnik-bc053a9b414722990e9d62cce9f32963cfe43096.tar
travnik-bc053a9b414722990e9d62cce9f32963cfe43096.tar.gz
travnik-bc053a9b414722990e9d62cce9f32963cfe43096.tar.bz2
travnik-bc053a9b414722990e9d62cce9f32963cfe43096.tar.lz
travnik-bc053a9b414722990e9d62cce9f32963cfe43096.tar.xz
travnik-bc053a9b414722990e9d62cce9f32963cfe43096.tar.zst
travnik-bc053a9b414722990e9d62cce9f32963cfe43096.zip
-rw-r--r--.gitignore3
-rw-r--r--analiza/zvezek.ipynb94
-rw-r--r--templates/index.html37
-rw-r--r--travnik.py109
-rwxr-xr-xwww/app.py35
5 files changed, 188 insertions, 90 deletions
diff --git a/.gitignore b/.gitignore
index 90df181..effc8af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,6 @@ doc/
possible_torrents.L
*.torrent
.ipynb_checkpoints/
+J
+# anez, nek prazen file
+__pycache__/
diff --git a/analiza/zvezek.ipynb b/analiza/zvezek.ipynb
index 1b832e2..b88677b 100644
--- a/analiza/zvezek.ipynb
+++ b/analiza/zvezek.ipynb
@@ -15,101 +15,15 @@
"......\n",
"fixed bad single file torrent 4f269d8aefd647ee270842d53ec98aebd23a4afe\n",
"fixed bad single file torrent 7b09ae0b612dafc1744562dccbbe4becf4d633c3\n",
- "37769 @ 434.7589800900314 s\n"
+ "38021 @ 413.0262871221639 s\n"
]
}
],
"source": [
- "from bencodepy import decode\n",
- "from enum import Enum\n",
- "from hashlib import sha1, sha256\n",
- "from os import scandir\n",
"from time import monotonic\n",
- "class Type(Enum):\n",
- " UNDEF = 0,\n",
- " V1 = 1,\n",
- " V2 = 2,\n",
- " HYBRID = 3\n",
- "class Torrent():\n",
- " def __init__(self):\n",
- " self.sha1 = b''\n",
- " self.files = {}\n",
- " self.type = Type.UNDEF\n",
- " def file(self, f):\n",
- " self.parse(open(f, \"rb\").read())\n",
- " def parse(self, b):\n",
- " infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]\n",
- " self.sha1 = sha1(infodict).digest()\n",
- " self.sha256 = sha256(infodict).digest()\n",
- " self.dict = decode(b)\n",
- " if b'pieces' in self.dict.get(b'info'):\n",
- " self.dict.get(b'info').pop(b'pieces')\n",
- " if b'files' in self.dict.get(b'info').keys():\n",
- " self.type = Type.V1\n",
- " for file in self.dict.get(b'info').get(b'files'):\n",
- " if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'):\n",
- " continue\n",
- " def insert_file(d, path, length, self):\n",
- " name = path.pop()\n",
- " if not len(path):\n",
- " d[name] = length\n",
- " return\n",
- " if name not in d.keys():\n",
- " d[name] = {}\n",
- " insert_file(d[name], path, length, self)\n",
- " file.get(b'path').reverse()\n",
- " insert_file(self.files, file.get(b'path'), file.get(b'length'), self)\n",
- " self.dict.get(b'info').pop(b'files')\n",
- " if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first\n",
- " if self.type is Type.V1:\n",
- " self.type = Type.HYBRID\n",
- " else:\n",
- " def filetree(names):\n",
- " r = {}\n",
- " for key in names.keys():\n",
- " if key == b'':\n",
- " return names.get(key).get(b'length')\n",
- " r[key] = filetree(names.get(key))\n",
- " return r\n",
- " self.files = filetree(self.dict.get(b'info').get(b'file tree'))\n",
- " self.dict.get(b'info').pop(b'file tree')\n",
- " if not len(self.files):\n",
- " self.type = Type.V1\n",
- " self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')\n",
- " first_filename = [i for i in self.files.keys()][0]\n",
- " if len(self.files) == 1 and self.files[first_filename] == {}:\n",
- " print(\"fixed bad single file torrent\", self.sha1.hex())\n",
- " self.files[first_filename] = self.dict.get(b'info').get(b'length')\n",
- " def paths(self):\n",
- " def paths_r(d, path=None):\n",
- " if path is None:\n",
- " path = []\n",
- " for f in d.keys():\n",
- " if type(d[f]) is int:\n",
- " z = path.copy()\n",
- " z.append(f)\n",
- " yield z, d[f]\n",
- " else:\n",
- " z = path.copy()\n",
- " z.append(f)\n",
- " for z, v in paths_r(d[f], z):\n",
- " yield z, v\n",
- " for z, v in paths_r(self.files):\n",
- " yield z, v\n",
- " def __repr__(self):\n",
- " return str(self.__dict__)\n",
- " def __hash__(self):\n",
- " if len(self.sha1):\n",
- " return int.from_bytes(self.sha1, byteorder=\"big\")\n",
- " return id(self)\n",
- "def glob(d):\n",
- " r = {}\n",
- " for f in scandir(d):\n",
- " if f.name.endswith(\".torrent\") and f.is_file():\n",
- " t = Torrent()\n",
- " t.file(f.path)\n",
- " r[t.sha1] = t\n",
- " return r\n",
+ "from sys import path\n",
+ "path.append(\"/root/projects/travnik\")\n",
+ "from travnik import glob\n",
"print(\"......\")\n",
"start = monotonic()\n",
"torrents = glob(\"/root/projects/travnik\")\n",
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..b4f0804
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,37 @@
+<!DOCTYPE html>
+<html lang=sl>
+ <head>
+ <meta charset=UTF-8 />
+ <meta name=viewport content='width=device-width, initial-scale=1.0'>
+ <title>
+ travnik
+ </title>
+ <link rel=stylesheet href=//searc.šijanec.eu./css.css />
+ </head>
+ <body>
+ <h1>
+ travnik
+ </h1>
+ <form>
+ <label for=regex>
+ regex
+ </label>
+ <input name=regex id=regex placeholder=DVDRip value="{{ request.args.get("regex")|e }}" />
+ razvrsti
+ <select name=order>
+ <option {{ "selected" if request.args.get("order") == "naraščajoče" else "" }} value=naraščajoče>naraščajoče</option>
+ <option {{ "selected" if request.args.get("order") == "padajoče" else "" }} value=padajoče>padajoče</option>
+ </select>
+ po
+ <select name=sort>
+ <!-- <option {{ "selected" if request.args.get("sort") == "popularnosti" else "" }} value=popularnosti>popularnosti</option> --> <!-- N/I -->
+ <option {{ "selected" if request.args.get("sort") == "velikosti" else "" }} value=velikosti>velikosti</option>
+ <option {{ "selected" if request.args.get("sort") == "datumu" else "" }} value=datumu>datumu najdbe</option>
+ <option {{ "selected" if request.args.get("sort") == "datotekah" }} value=datotekah>številu datotek</option>
+ </select>
+ <input type=submit value=išči />
+ </form>
+ <hr>
+ <p><a href=//ni.šijanec.eu./sijanec/travnik>travnik</a> ima <b>{{ torrentov }}</b> {{ mno(torrentov, ["torrentov", "torrent", "torrenta", "torrente"]) }}. prižigal se je v <b>{{ zagontekst }}</b> v {{ roundstartuptime }} s in porablja {{ rammib }} MiB fizičnega pomnilnika.
+ </body>
+</html>
diff --git a/travnik.py b/travnik.py
new file mode 100644
index 0000000..20586f4
--- /dev/null
+++ b/travnik.py
@@ -0,0 +1,109 @@
+from bencodepy import decode
+from enum import Enum
+from hashlib import sha1, sha256
+from os import scandir
+from re import search, IGNORECASE
+class Type(Enum):
+ UNDEF = 0,
+ V1 = 1,
+ V2 = 2,
+ HYBRID = 3
+class Torrent():
+ def __init__(self):
+ self.sha1 = b''
+ self.files = {}
+ self.type = Type.UNDEF
+ def file(self, f):
+ self.parse(open(f, "rb").read())
+ def parse(self, b):
+ infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]
+ self.sha1 = sha1(infodict).digest()
+ self.sha256 = sha256(infodict).digest()
+ self.dict = decode(b)
+ if b'pieces' in self.dict.get(b'info'):
+ self.dict.get(b'info').pop(b'pieces')
+ if b'files' in self.dict.get(b'info').keys():
+ self.type = Type.V1
+ for file in self.dict.get(b'info').get(b'files'):
+ if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'):
+ continue
+ def insert_file(d, path, length, self):
+ name = path.pop()
+ if not len(path):
+ d[name] = length
+ return
+ if name not in d.keys():
+ d[name] = {}
+ insert_file(d[name], path, length, self)
+ file.get(b'path').reverse()
+ insert_file(self.files, file.get(b'path'), file.get(b'length'), self)
+ self.dict.get(b'info').pop(b'files')
+ if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first
+ if self.type is Type.V1:
+ self.type = Type.HYBRID
+ else:
+ def filetree(names):
+ r = {}
+ for key in names.keys():
+ if key == b'':
+ return names.get(key).get(b'length')
+ r[key] = filetree(names.get(key))
+ return r
+ self.files = filetree(self.dict.get(b'info').get(b'file tree'))
+ self.dict.get(b'info').pop(b'file tree')
+ if not len(self.files):
+ self.type = Type.V1
+ self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')
+ first_filename = [i for i in self.files.keys()][0]
+ if len(self.files) == 1 and self.files[first_filename] == {}:
+ print("fixed bad single file torrent", self.sha1.hex())
+ self.files[first_filename] = self.dict.get(b'info').get(b'length')
+ def paths(self):
+ def paths_r(d, path=None):
+ if path is None:
+ path = []
+ for f in d.keys():
+ if type(d[f]) is int:
+ z = path.copy()
+ z.append(f)
+ yield z, d[f]
+ else:
+ z = path.copy()
+ z.append(f)
+ for z, v in paths_r(d[f], z):
+ yield z, v
+ for z, v in paths_r(self.files):
+ yield z, v
+ def matches(self, r):
+ if search(r, self.dict.get(b'info').get(b'name'), IGNORECASE):
+ return True
+ for path, size in paths(self):
+ if search(r, path, IGNORECASE):
+ return True
+ return False
+ def matching_files(self, r):
+ def matching_files_r(dir, r):
+ files = {}
+ for name, content in self.paths:
+ if search(r, name, IGNORECASE):
+ files[name] = content
+ if type(content) is dict:
+ inhalt = matching_files_r(content, r)
+ if inhalt:
+ files[name] = inhalt
+ return files
+ return matching_files_r(self.paths, r)
+ def __repr__(self):
+ return str(self.__dict__)
+ def __hash__(self):
+ if len(self.sha1):
+ return int.from_bytes(self.sha1, byteorder="big")
+ return id(self)
+def glob(d):
+ r = {}
+ for f in scandir(d):
+ if f.name.endswith(".torrent") and f.is_file():
+ t = Torrent()
+ t.file(f.path)
+ r[t.sha1] = t
+ return r
diff --git a/www/app.py b/www/app.py
new file mode 100755
index 0000000..1ba6af7
--- /dev/null
+++ b/www/app.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+from re import search, IGNORECASE
+from time import monotonic
+from flask import Flask, render_template, escape, request
+from sys import argv, path
+from os import getpid
+from psutil import Process
+from urllib.parse import quote
+from datetime import datetime
+from locale import setlocale, LC_ALL
+path.append(".")
+from travnik import glob, Type
+setlocale(LC_ALL, "")
+app = Flask("travnik")
+startuptime = -1
+zagon = datetime.now()
+def mno(quantity, types):
+ if quantity % 100 == 1:
+ return quantity[1]
+ if quantity % 100 == 2:
+ return quantity[2]
+ if quantity % 100 == 3:
+ return quantity[3]
+ return quantity[0]
+@app.route("/")
+def index():
+ return render_template("index.html")
+if __name__ == "__main__":
+ print("zaganjam travnik", argv[0], "... zagon traja dolgo časa (~5 min za ~40k torrentov. za delovanje je potrebnih ~300 MiB RAM RES za ~40k torrentov. sharding je WIP.")
+ start = monotonic()
+ torrents = {}
+ print("zagon uspešen. v", monotonic()-start, "sem indeksiral", len(torrents), "torrentov")
+ app.jinja_env.globals.update(mno=mno, zagontekst=zagon.strftime("%c"), torrentov=len(torrents))
+ app.jinja_env.add_extension('jinja2.ext.loopcontrols')
+ app.run(host="::", port=8080, debug=True)