Files
apitdn/link_validator.py
T

50 lines
1.6 KiB
Python
Raw Normal View History

import os
import glob
import re
def check_links():
docs_dir = "fluig_rag_docs"
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
broken_links = []
# Regex para encontrar links markdown: [texto](link.md)
# Suporta parênteses aninhados (até 1 nível)
link_pattern = re.compile(r"\[.*?\]\(((?:[^()]+|\([^()]*\))*)\)")
print(f"Validando links em {len(files)} arquivos...")
for file_path in files:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
current_dir = os.path.dirname(file_path)
links = link_pattern.findall(content)
for link in links:
# Ignorar links externos e âncoras puras
if link.startswith("http") or link.startswith("#") or ":" in link:
continue
# Limpar âncoras do link local
clean_link = link.split("#")[0]
if not clean_link: continue
target_path = os.path.abspath(os.path.join(current_dir, clean_link))
if not os.path.exists(target_path):
broken_links.append({
"file": file_path,
"link": link,
"target": target_path
})
if broken_links:
print(f"\nEncontrados {len(broken_links)} links quebrados:")
for bl in broken_links:
print(f"Arquivo: {bl['file']} -> Link: {bl['link']}")
else:
print("\nNenhum link quebrado encontrado! 🎉")
if __name__ == "__main__":
check_links()