feat: upgrade to async extractor, add RAG processing, link healing and Docker support

This commit is contained in:
rodolpho
2026-05-07 18:43:43 -03:00
parent 68dc35abbd
commit 570292d8a9
116 changed files with 16277 additions and 388 deletions
+55
View File
@@ -0,0 +1,55 @@
import os
import glob
import re
def sanitize_code_blocks(markdown_content):
"""Tenta inferir a linguagem de blocos de código sem linguagem definida."""
def replace_code(match):
lang = match.group(1).strip()
code = match.group(2)
# Lógica de inferência para Fluig
if not lang or lang == "java" or lang == "javascript":
if any(x in code for x in ["DatasetBuilder", "createDataset", "displayFields", "getSelectedZoomItem", "parent_child"]):
lang = "javascript"
elif any(x in code for x in ["PreparedStatement", "ResultSet", "DriverManager", "import java."]):
lang = "java"
elif "SELECT" in code.upper() and "FROM" in code.upper():
lang = "sql"
return f"```{lang}\n{code}\n```"
pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL)
return pattern.sub(replace_code, markdown_content)
def reprocess_all():
docs_dir = "fluig_rag_docs"
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
changed_count = 0
print(f"Reprocessando {len(files)} arquivos...")
for file_path in files:
if "Biblioteca de Snippets" in file_path: continue
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
original_content = content
# 1. Sanitização de blocos de código
content = sanitize_code_blocks(content)
# 2. Correção de Admonitions residuais (> !!! -> !!!)
content = content.replace("> !!!", "!!!")
if content != original_content:
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
changed_count += 1
# print(f"Atualizado: {file_path}")
print(f"\nFinalizado! {changed_count} arquivos foram atualizados com melhorias semânticas.")
if __name__ == "__main__":
reprocess_all()