feat: upgrade to async extractor, add RAG processing, link healing and Docker support
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
|
||||
def check_links():
|
||||
docs_dir = "fluig_rag_docs"
|
||||
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
|
||||
broken_links = []
|
||||
|
||||
# Regex para encontrar links markdown: [texto](link.md)
|
||||
# Suporta parênteses aninhados (até 1 nível)
|
||||
link_pattern = re.compile(r"\[.*?\]\(((?:[^()]+|\([^()]*\))*)\)")
|
||||
|
||||
print(f"Validando links em {len(files)} arquivos...")
|
||||
|
||||
for file_path in files:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
current_dir = os.path.dirname(file_path)
|
||||
links = link_pattern.findall(content)
|
||||
|
||||
for link in links:
|
||||
# Ignorar links externos e âncoras puras
|
||||
if link.startswith("http") or link.startswith("#") or ":" in link:
|
||||
continue
|
||||
|
||||
# Limpar âncoras do link local
|
||||
clean_link = link.split("#")[0]
|
||||
if not clean_link: continue
|
||||
|
||||
target_path = os.path.abspath(os.path.join(current_dir, clean_link))
|
||||
|
||||
if not os.path.exists(target_path):
|
||||
broken_links.append({
|
||||
"file": file_path,
|
||||
"link": link,
|
||||
"target": target_path
|
||||
})
|
||||
|
||||
if broken_links:
|
||||
print(f"\nEncontrados {len(broken_links)} links quebrados:")
|
||||
for bl in broken_links:
|
||||
print(f"Arquivo: {bl['file']} -> Link: {bl['link']}")
|
||||
else:
|
||||
print("\nNenhum link quebrado encontrado! 🎉")
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_links()
|
||||
Reference in New Issue
Block a user