56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
|
|
import os
|
||
|
|
import glob
|
||
|
|
import re
|
||
|
|
|
||
|
|
def sanitize_code_blocks(markdown_content):
|
||
|
|
"""Tenta inferir a linguagem de blocos de código sem linguagem definida."""
|
||
|
|
def replace_code(match):
|
||
|
|
lang = match.group(1).strip()
|
||
|
|
code = match.group(2)
|
||
|
|
|
||
|
|
# Lógica de inferência para Fluig
|
||
|
|
if not lang or lang == "java" or lang == "javascript":
|
||
|
|
if any(x in code for x in ["DatasetBuilder", "createDataset", "displayFields", "getSelectedZoomItem", "parent_child"]):
|
||
|
|
lang = "javascript"
|
||
|
|
elif any(x in code for x in ["PreparedStatement", "ResultSet", "DriverManager", "import java."]):
|
||
|
|
lang = "java"
|
||
|
|
elif "SELECT" in code.upper() and "FROM" in code.upper():
|
||
|
|
lang = "sql"
|
||
|
|
|
||
|
|
return f"```{lang}\n{code}\n```"
|
||
|
|
|
||
|
|
pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL)
|
||
|
|
return pattern.sub(replace_code, markdown_content)
|
||
|
|
|
||
|
|
def reprocess_all():
|
||
|
|
docs_dir = "fluig_rag_docs"
|
||
|
|
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
|
||
|
|
|
||
|
|
changed_count = 0
|
||
|
|
print(f"Reprocessando {len(files)} arquivos...")
|
||
|
|
|
||
|
|
for file_path in files:
|
||
|
|
if "Biblioteca de Snippets" in file_path: continue
|
||
|
|
|
||
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
original_content = content
|
||
|
|
|
||
|
|
# 1. Sanitização de blocos de código
|
||
|
|
content = sanitize_code_blocks(content)
|
||
|
|
|
||
|
|
# 2. Correção de Admonitions residuais (> !!! -> !!!)
|
||
|
|
content = content.replace("> !!!", "!!!")
|
||
|
|
|
||
|
|
if content != original_content:
|
||
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
||
|
|
f.write(content)
|
||
|
|
changed_count += 1
|
||
|
|
# print(f"Atualizado: {file_path}")
|
||
|
|
|
||
|
|
print(f"\nFinalizado! {changed_count} arquivos foram atualizados com melhorias semânticas.")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
reprocess_all()
|