import os import glob import re def heal_links(): docs_dir = "fluig_rag_docs" # 1. Mapear todos os arquivos reais existentes all_files = glob.glob(os.path.join(docs_dir, "**", "*"), recursive=True) existing_paths = {os.path.abspath(f): f for f in all_files if os.path.isfile(f)} md_files = [f for f in all_files if f.endswith(".md")] # Regex que suporta parênteses aninhados (até 1 nível) link_pattern = re.compile(r"\[(.*?)\]\(((?:[^()]+|\([^()]*\))*)\)") healed_count = 0 print(f"Iniciando cura profunda de links em {len(md_files)} arquivos...") for file_path in md_files: with open(file_path, "r", encoding="utf-8") as f: content = f.read() current_dir = os.path.dirname(file_path) def replace_link(match): nonlocal healed_count text = match.group(1) link = match.group(2) # Pular links externos if link.startswith("http") or ":" in link and not link.startswith("."): return match.group(0) # Correção 1: Remover ".md/" no meio do caminho (bug de substituição parcial) # Ex: Path.md/Subpath.md -> Path/Subpath.md if ".md/" in link: link = link.replace(".md/", "/") # Separar âncora parts = link.split("#") path_part = parts[0] anchor = "#" + parts[1] if len(parts) > 1 else "" if not path_part: return f"[{text}]({link})" target_path = os.path.abspath(os.path.join(current_dir, path_part)) # Se o link já existe, apenas devolve (com correção 1 aplicada se houve) if os.path.exists(target_path): if link != match.group(2): healed_count += 1 return f"[{text}]({link})" # Correção 2: Tentar adicionar extensões ou fechar parênteses candidates = [ path_part + ".md", path_part + ").md", path_part + ")", path_part.rstrip("(") + ").md" ] for cand in candidates: cand_path = os.path.abspath(os.path.join(current_dir, cand)) if os.path.exists(cand_path): healed_count += 1 new_rel = os.path.relpath(cand_path, current_dir).replace("\\", "/") return f"[{text}]({new_rel}{anchor})" # Correção 3: Busca aproximada base_name = os.path.basename(path_part).lower().strip() if base_name: for abs_p, rel_p in existing_paths.items(): target_base = os.path.basename(rel_p).lower() if base_name in target_base: healed_count += 1 new_rel = os.path.relpath(abs_p, current_dir).replace("\\", "/") return f"[{text}]({new_rel}{anchor})" return match.group(0) new_content = link_pattern.sub(replace_link, content) if new_content != content: with open(file_path, "w", encoding="utf-8") as f: f.write(new_content) print(f"\nCura concluída! {healed_count} links foram analisados/corrigidos.") if __name__ == "__main__": heal_links()