#!/usr/bin/python3 import sys import email import quopri import re from bs4 import BeautifulSoup import tempfile import shutil import subprocess def extract_urls_html(html_string): soup = BeautifulSoup(html_string, 'html.parser') a_tags = soup.find_all('a', href=True) urls = [a['href'] for a in a_tags] return urls def extract_urls_text(input_string): pattern = r'https?://\S+' urls = re.findall(pattern, input_string) return urls def decode_quoted_printable(encoded_text): return quopri.decodestring(encoded_text).decode('utf-8') def parse_mbox(mbox_text, contenttype): mbox = email.message_from_string(mbox_text) for part in mbox.walk(): content_type = part.get_content_type() if content_type == contenttype: content_transfer_encoding = part.get('Content-Transfer-Encoding', '').lower() if content_transfer_encoding == 'quoted-printable': content = decode_quoted_printable(part.get_payload()) else: content = part.get_payload() return content if __name__ == "__main__": mbox_text = sys.stdin.read() temp_dir = tempfile.mkdtemp("linkclicker") for link in extract_urls_text(parse_mbox(mbox_text, "text/plain")) + extract_urls_html(parse_mbox(mbox_text, "text/html")): cmd = ["timeout", "30s", "librewolf", "--headless", "--profile", temp_dir, "--no-remote", "--new-instance", "--screenshot", temp_dir + "/screenshot.png", "--", link] result = subprocess.run(cmd, capture_output=True, text=True) print("cmd: " + str(cmd) + "\nstdout: " + result.stdout + "\nstderr: " + result.stderr + "\nreturncode: " + str(result.returncode)); shutil.rmtree(temp_dir)