"""
mod_notes_links.py v1.9
Julián
Manage Markdown files:
- Scan every website index.html files
- Search attached files pattern
- Set relative path
- Change alt text by file name
- Replace image tag for classic file tag
- Search external links pattern
- Update link to open in a tab
"""
import os
import re
from datetime import datetime
# Custom variables
WEBSITE_PAGES = "/var/www/<website_name>/public/posts"
DOMAIN = ('<ip_address>', '<domain_name>')
DEBUG = False
# DEBUG = True
DEBUG_FILE = True
DEBUG_LINK = True
TEST = False
# TEST = True
# Variables
PATTERN_FILE = re.compile(r'<p><img\s+(?P<alt>alt=".*?"\s+)?src=".*\/(?P<filename>.*?)(?P<extension>\..*?)">')
PATTERN_LINK = re.compile(r'<a href="https?:\/\/(?P<domain>.*?)"+?\s*(?P<target>target=".*")?>.*?<\/a>')
IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp')
# Print header
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] >>> mod_notes_links.py")
s = "DEBUG=True" if DEBUG else "DEBUG=False"
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]", s)
s = "TEST=True" if TEST else "TEST=False"
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]", s)
# Scan every website index.html files
for dirpath, dirnames, filenames in os.walk(WEBSITE_PAGES):
for filename in filenames:
if filename == "index.html":
filepath = os.path.join(dirpath, filename)
new_file = []
modified = False
with open(filepath, "r", encoding="utf-8") as file:
for line in file:
# Test patterns
match_file = PATTERN_FILE.search(line)
match_link = PATTERN_LINK.search(line)
# Step 1: File match
if match_file:
if DEBUG and DEBUG_FILE:
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] --- {filepath}")
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ------ MATCH FILE")
new_line = line
# Set relative path
new_line = new_line.replace("src=\"", "src=\"../", 1)
# Change alt text by filename
if match_file.group("alt"):
new_line = new_line.replace(match_file.group("alt"), "alt=\""+match_file.group("filename")+"\" ", 1)
# Replace image tag for classic file tag
if not match_file.group("extension").lower().endswith(IMAGE_EXTENSIONS):
new_line = new_line.replace("<p><img src=\"", "<a href=\"", 1)
new_line = new_line.replace("\"></p>", "\">"+match_file.group("filename")+match_file.group("extension")+"</a>", 1)
# Add new line to new file
new_file.append(new_line)
modified = True
# Debug
if DEBUG and DEBUG_FILE:
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] old_line:", line)
# print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] match_file:", match_file.group(0))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] alt:", match_file.group("alt"))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] filename:", match_file.group("filename"))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] extension:", match_file.group("extension"))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] new_line:", new_line)
# Step 2: Link match
elif match_link:
if DEBUG and DEBUG_LINK:
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] --- {filepath}")
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ------ MATCH LINK")
new_line = line
if not match_link.group("domain").lower().endswith(DOMAIN):
if not match_link.group("target"):
new_line = re.sub(r'(<a\s+href="[^"]+")', r'\1 target="_blank"', new_line)
# Add new line to new file
new_file.append(new_line)
modified = True
# Debug
if DEBUG and DEBUG_LINK:
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] old_line:", line)
# print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] match_link:", match_link.group(0))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] domain:", match_link.group("domain"))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] target:", match_link.group("target"))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] new_line:", new_line)
else:
# Add old line to new file
new_file.append(line)
# Write back new file if something changed
if modified and not TEST:
with open(filepath, "w", encoding="utf-8") as file:
file.writelines(new_file)