import os import glob import re def check_links(): docs_dir = "fluig_rag_docs" files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True) broken_links = [] # Regex para encontrar links markdown: [texto](link.md) # Suporta parênteses aninhados (até 1 nível) link_pattern = re.compile(r"\[.*?\]\(((?:[^()]+|\([^()]*\))*)\)") print(f"Validando links em {len(files)} arquivos...") for file_path in files: with open(file_path, "r", encoding="utf-8") as f: content = f.read() current_dir = os.path.dirname(file_path) links = link_pattern.findall(content) for link in links: # Ignorar links externos e âncoras puras if link.startswith("http") or link.startswith("#") or ":" in link: continue # Limpar âncoras do link local clean_link = link.split("#")[0] if not clean_link: continue target_path = os.path.abspath(os.path.join(current_dir, clean_link)) if not os.path.exists(target_path): broken_links.append({ "file": file_path, "link": link, "target": target_path }) if broken_links: print(f"\nEncontrados {len(broken_links)} links quebrados:") for bl in broken_links: print(f"Arquivo: {bl['file']} -> Link: {bl['link']}") else: print("\nNenhum link quebrado encontrado! 🎉") if __name__ == "__main__": check_links()