50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
|
|
import os
|
||
|
|
import glob
|
||
|
|
import re
|
||
|
|
|
||
|
|
def check_links():
|
||
|
|
docs_dir = "fluig_rag_docs"
|
||
|
|
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
|
||
|
|
broken_links = []
|
||
|
|
|
||
|
|
# Regex para encontrar links markdown: [texto](link.md)
|
||
|
|
# Suporta parênteses aninhados (até 1 nível)
|
||
|
|
link_pattern = re.compile(r"\[.*?\]\(((?:[^()]+|\([^()]*\))*)\)")
|
||
|
|
|
||
|
|
print(f"Validando links em {len(files)} arquivos...")
|
||
|
|
|
||
|
|
for file_path in files:
|
||
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
current_dir = os.path.dirname(file_path)
|
||
|
|
links = link_pattern.findall(content)
|
||
|
|
|
||
|
|
for link in links:
|
||
|
|
# Ignorar links externos e âncoras puras
|
||
|
|
if link.startswith("http") or link.startswith("#") or ":" in link:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Limpar âncoras do link local
|
||
|
|
clean_link = link.split("#")[0]
|
||
|
|
if not clean_link: continue
|
||
|
|
|
||
|
|
target_path = os.path.abspath(os.path.join(current_dir, clean_link))
|
||
|
|
|
||
|
|
if not os.path.exists(target_path):
|
||
|
|
broken_links.append({
|
||
|
|
"file": file_path,
|
||
|
|
"link": link,
|
||
|
|
"target": target_path
|
||
|
|
})
|
||
|
|
|
||
|
|
if broken_links:
|
||
|
|
print(f"\nEncontrados {len(broken_links)} links quebrados:")
|
||
|
|
for bl in broken_links:
|
||
|
|
print(f"Arquivo: {bl['file']} -> Link: {bl['link']}")
|
||
|
|
else:
|
||
|
|
print("\nNenhum link quebrado encontrado! 🎉")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
check_links()
|