summaryrefslogtreecommitdiffstats
path: root/skripti/emailautolinkclicker.py
blob: f9d200b6030427730ff6305db22dd8973356cdfc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/python3
import sys
import email
import quopri
import re
from bs4 import BeautifulSoup
import tempfile
import shutil
import subprocess
def extract_urls_html(html_string):
	soup = BeautifulSoup(html_string, 'html.parser')
	a_tags = soup.find_all('a', href=True)
	urls = [a['href'] for a in a_tags]
	return urls
def extract_urls_text(input_string):
	pattern = r'https?://\S+'
	urls = re.findall(pattern, input_string)
	return urls
def decode_quoted_printable(encoded_text):
	return quopri.decodestring(encoded_text).decode('utf-8')
def parse_mbox(mbox_text, contenttype):
	mbox = email.message_from_string(mbox_text)
	for part in mbox.walk():
		content_type = part.get_content_type()
		if content_type == contenttype:
			content_transfer_encoding = part.get('Content-Transfer-Encoding', '').lower()
			if content_transfer_encoding == 'quoted-printable':
				content = decode_quoted_printable(part.get_payload())
			else:
				content = part.get_payload()
	return content
if __name__ == "__main__":
	mbox_text = sys.stdin.read()
	temp_dir = tempfile.mkdtemp("linkclicker")
	for link in extract_urls_text(parse_mbox(mbox_text, "text/plain")) + extract_urls_html(parse_mbox(mbox_text, "text/html")):
		cmd = ["timeout", "30s", "librewolf", "--headless", "--profile", temp_dir, "--no-remote", "--new-instance", "--screenshot", temp_dir + "/screenshot.png", "--", link]
		result = subprocess.run(cmd, capture_output=True, text=True)
		print("cmd: " + str(cmd) + "\nstdout: " + result.stdout + "\nstderr: " + result.stderr + "\nreturncode: " + str(result.returncode));
	shutil.rmtree(temp_dir)