91 lines
3.4 KiB
Python
91 lines
3.4 KiB
Python
import os
|
|
import glob
|
|
import re
|
|
|
|
def heal_links():
|
|
docs_dir = "fluig_rag_docs"
|
|
# 1. Mapear todos os arquivos reais existentes
|
|
all_files = glob.glob(os.path.join(docs_dir, "**", "*"), recursive=True)
|
|
existing_paths = {os.path.abspath(f): f for f in all_files if os.path.isfile(f)}
|
|
|
|
md_files = [f for f in all_files if f.endswith(".md")]
|
|
|
|
# Regex que suporta parênteses aninhados (até 1 nível)
|
|
link_pattern = re.compile(r"\[(.*?)\]\(((?:[^()]+|\([^()]*\))*)\)")
|
|
|
|
healed_count = 0
|
|
print(f"Iniciando cura profunda de links em {len(md_files)} arquivos...")
|
|
|
|
for file_path in md_files:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
current_dir = os.path.dirname(file_path)
|
|
|
|
def replace_link(match):
|
|
nonlocal healed_count
|
|
text = match.group(1)
|
|
link = match.group(2)
|
|
|
|
# Pular links externos
|
|
if link.startswith("http") or ":" in link and not link.startswith("."):
|
|
return match.group(0)
|
|
|
|
# Correção 1: Remover ".md/" no meio do caminho (bug de substituição parcial)
|
|
# Ex: Path.md/Subpath.md -> Path/Subpath.md
|
|
if ".md/" in link:
|
|
link = link.replace(".md/", "/")
|
|
|
|
# Separar âncora
|
|
parts = link.split("#")
|
|
path_part = parts[0]
|
|
anchor = "#" + parts[1] if len(parts) > 1 else ""
|
|
|
|
if not path_part: return f"[{text}]({link})"
|
|
|
|
target_path = os.path.abspath(os.path.join(current_dir, path_part))
|
|
|
|
# Se o link já existe, apenas devolve (com correção 1 aplicada se houve)
|
|
if os.path.exists(target_path):
|
|
if link != match.group(2):
|
|
healed_count += 1
|
|
return f"[{text}]({link})"
|
|
|
|
# Correção 2: Tentar adicionar extensões ou fechar parênteses
|
|
candidates = [
|
|
path_part + ".md",
|
|
path_part + ").md",
|
|
path_part + ")",
|
|
path_part.rstrip("(") + ").md"
|
|
]
|
|
|
|
for cand in candidates:
|
|
cand_path = os.path.abspath(os.path.join(current_dir, cand))
|
|
if os.path.exists(cand_path):
|
|
healed_count += 1
|
|
new_rel = os.path.relpath(cand_path, current_dir).replace("\\", "/")
|
|
return f"[{text}]({new_rel}{anchor})"
|
|
|
|
# Correção 3: Busca aproximada
|
|
base_name = os.path.basename(path_part).lower().strip()
|
|
if base_name:
|
|
for abs_p, rel_p in existing_paths.items():
|
|
target_base = os.path.basename(rel_p).lower()
|
|
if base_name in target_base:
|
|
healed_count += 1
|
|
new_rel = os.path.relpath(abs_p, current_dir).replace("\\", "/")
|
|
return f"[{text}]({new_rel}{anchor})"
|
|
|
|
return match.group(0)
|
|
|
|
new_content = link_pattern.sub(replace_link, content)
|
|
|
|
if new_content != content:
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
f.write(new_content)
|
|
|
|
print(f"\nCura concluída! {healed_count} links foram analisados/corrigidos.")
|
|
|
|
if __name__ == "__main__":
|
|
heal_links()
|