#!/usr/bin/env python3 #this file = selenium2.py # Run it: ./selenium2.py "https://example.com" # Based on HTML title tag being or not being "Just a moment...", # not a 140-second wait or however long ## Bash: #read -p "url: " url; ./selenium2.py $url | tail -n1 | xargs -d "\n" sh -c 'for args do path=$(echo "$args" | sed "s/.* to .//g" | sed "s/.$//g"); path2=$(echo "$path" | sed "s/^...............//g"); path3=$(echo $path2 | sed "s/\/[^\/]*$//g"); ssh user@10.0.0.2 "mkdir -p /path/to/$path3"; rsync -a --info=progress2 "$path" user@10.0.0.2:/path/to/$path2; done' _ ## rsync -a --info=progress2 /src/path/ user@10.0.0.2:/path/to/memento/src/ ## ssh user@10.0.0.2 ## find /path/to/memento/src -maxdepth 1 -type d | tail -n+2 | xargs -d "\n" sh -c 'for args do echo -n "$args"; echo /memento/; done' _ | xargs -d "\n" sh -c 'for args do rsync -a --info=progress2 "$args" /path/to/memento/; done' _ import sys import time import re import io import os import shutil from datetime import datetime, timezone from selenium import webdriver from selenium.webdriver.common.proxy import * def wait_for_title_change(driver, initial_title, timeout=300): end_time = time.time() + timeout while time.time() < end_time: current_title = driver.title if current_title != initial_title: print(f"Title changed to: {current_title}") return current_title time.sleep(1) print("Timeout waiting for title to change.") return None def main(): options = webdriver.ChromeOptions() options.binary_location = "/usr/bin/brave-browser" options.accept_untrusted_certs = True options.add_argument('--ignore-certificate-errors') driver = webdriver.Chrome(options=options) # url = sys.argv[2] url = sys.argv[1] urlsafe = re.sub(r'[:/?=@&(),+*%#]', '-', url) driver.get(url) # time.sleep(int(sys.argv[1])) # Wait for the title to change from "Just a moment..." new_title = wait_for_title_change(driver, "Just a moment...") if new_title: now = datetime.now(timezone.utc) formatted_time = now.strftime("%Y%m%d%H%M%S") file_time_url = formatted_time + "-" + urlsafe with open(file_time_url, "w", encoding="utf-8") as w: w.write(driver.page_source) with open(file_time_url + ".txt", "w", encoding="utf-8") as w: w.write(url + "\n") directory_path = formatted_time + "/memento/" + formatted_time + "/" + re.sub(r"https?://", "", url) os.makedirs(directory_path, exist_ok=True) source_file = file_time_url destination_dir = directory_path destination_file = os.path.join(destination_dir, "index.html") if not os.path.exists(destination_file): shutil.copy2(source_file, destination_file) print(f"Copied '{source_file}' to '{destination_file}'") else: print(f"'{destination_file}' already exists. No file copied.") driver.quit() if __name__=="__main__": main()