feat: upgrade to async extractor, add RAG processing, link healing and Docker support

This commit is contained in:
rodolpho
2026-05-07 18:43:43 -03:00
parent 68dc35abbd
commit 570292d8a9
116 changed files with 16277 additions and 388 deletions
+49
View File
@@ -0,0 +1,49 @@
import os
import glob
import re
def check_links():
docs_dir = "fluig_rag_docs"
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
broken_links = []
# Regex para encontrar links markdown: [texto](link.md)
# Suporta parênteses aninhados (até 1 nível)
link_pattern = re.compile(r"\[.*?\]\(((?:[^()]+|\([^()]*\))*)\)")
print(f"Validando links em {len(files)} arquivos...")
for file_path in files:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
current_dir = os.path.dirname(file_path)
links = link_pattern.findall(content)
for link in links:
# Ignorar links externos e âncoras puras
if link.startswith("http") or link.startswith("#") or ":" in link:
continue
# Limpar âncoras do link local
clean_link = link.split("#")[0]
if not clean_link: continue
target_path = os.path.abspath(os.path.join(current_dir, clean_link))
if not os.path.exists(target_path):
broken_links.append({
"file": file_path,
"link": link,
"target": target_path
})
if broken_links:
print(f"\nEncontrados {len(broken_links)} links quebrados:")
for bl in broken_links:
print(f"Arquivo: {bl['file']} -> Link: {bl['link']}")
else:
print("\nNenhum link quebrado encontrado! 🎉")
if __name__ == "__main__":
check_links()