feat: upgrade to async extractor, add RAG processing, link healing and Docker support
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
|
||||
def sanitize_code_blocks(markdown_content):
|
||||
"""Tenta inferir a linguagem de blocos de código sem linguagem definida."""
|
||||
def replace_code(match):
|
||||
lang = match.group(1).strip()
|
||||
code = match.group(2)
|
||||
|
||||
# Lógica de inferência para Fluig
|
||||
if not lang or lang == "java" or lang == "javascript":
|
||||
if any(x in code for x in ["DatasetBuilder", "createDataset", "displayFields", "getSelectedZoomItem", "parent_child"]):
|
||||
lang = "javascript"
|
||||
elif any(x in code for x in ["PreparedStatement", "ResultSet", "DriverManager", "import java."]):
|
||||
lang = "java"
|
||||
elif "SELECT" in code.upper() and "FROM" in code.upper():
|
||||
lang = "sql"
|
||||
|
||||
return f"```{lang}\n{code}\n```"
|
||||
|
||||
pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL)
|
||||
return pattern.sub(replace_code, markdown_content)
|
||||
|
||||
def reprocess_all():
|
||||
docs_dir = "fluig_rag_docs"
|
||||
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
|
||||
|
||||
changed_count = 0
|
||||
print(f"Reprocessando {len(files)} arquivos...")
|
||||
|
||||
for file_path in files:
|
||||
if "Biblioteca de Snippets" in file_path: continue
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
original_content = content
|
||||
|
||||
# 1. Sanitização de blocos de código
|
||||
content = sanitize_code_blocks(content)
|
||||
|
||||
# 2. Correção de Admonitions residuais (> !!! -> !!!)
|
||||
content = content.replace("> !!!", "!!!")
|
||||
|
||||
if content != original_content:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
changed_count += 1
|
||||
# print(f"Atualizado: {file_path}")
|
||||
|
||||
print(f"\nFinalizado! {changed_count} arquivos foram atualizados com melhorias semânticas.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
reprocess_all()
|
||||
Reference in New Issue
Block a user