import os import glob import re def sanitize_code_blocks(markdown_content): """Tenta inferir a linguagem de blocos de código sem linguagem definida.""" def replace_code(match): lang = match.group(1).strip() code = match.group(2) # Lógica de inferência para Fluig if not lang or lang == "java" or lang == "javascript": if any(x in code for x in ["DatasetBuilder", "createDataset", "displayFields", "getSelectedZoomItem", "parent_child"]): lang = "javascript" elif any(x in code for x in ["PreparedStatement", "ResultSet", "DriverManager", "import java."]): lang = "java" elif "SELECT" in code.upper() and "FROM" in code.upper(): lang = "sql" return f"```{lang}\n{code}\n```" pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL) return pattern.sub(replace_code, markdown_content) def reprocess_all(): docs_dir = "fluig_rag_docs" files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True) changed_count = 0 print(f"Reprocessando {len(files)} arquivos...") for file_path in files: if "Biblioteca de Snippets" in file_path: continue with open(file_path, "r", encoding="utf-8") as f: content = f.read() original_content = content # 1. Sanitização de blocos de código content = sanitize_code_blocks(content) # 2. Correção de Admonitions residuais (> !!! -> !!!) content = content.replace("> !!!", "!!!") if content != original_content: with open(file_path, "w", encoding="utf-8") as f: f.write(content) changed_count += 1 # print(f"Atualizado: {file_path}") print(f"\nFinalizado! {changed_count} arquivos foram atualizados com melhorias semânticas.") if __name__ == "__main__": reprocess_all()