feat: upgrade to async extractor, add RAG processing, link healing and Docker support
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
|
||||
def extract_snippets():
|
||||
docs_dir = "fluig_rag_docs"
|
||||
output_dir = os.path.join(docs_dir, "Biblioteca de Snippets")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
|
||||
snippets = {"javascript": [], "java": [], "sql": []}
|
||||
|
||||
# Regex para blocos de código
|
||||
code_pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL)
|
||||
|
||||
print(f"Extraindo snippets de {len(files)} arquivos...")
|
||||
|
||||
for file_path in files:
|
||||
if "Biblioteca de Snippets" in file_path: continue
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
title_match = re.search(r"title: (.*)", content)
|
||||
doc_title = title_match.group(1) if title_match else os.path.basename(file_path)
|
||||
|
||||
matches = code_pattern.findall(content)
|
||||
for lang, code in matches:
|
||||
lang = lang.strip().lower()
|
||||
if lang in snippets:
|
||||
snippets[lang].append({
|
||||
"source": doc_title,
|
||||
"code": code.strip()
|
||||
})
|
||||
|
||||
# Gerar arquivos de snippets por linguagem
|
||||
for lang, items in snippets.items():
|
||||
if not items: continue
|
||||
|
||||
snippet_file = os.path.join(output_dir, f"Snippets {lang.upper()}.md")
|
||||
with open(snippet_file, "w", encoding="utf-8") as f:
|
||||
f.write(f"# Biblioteca de Snippets: {lang.upper()}\n\n")
|
||||
for item in items:
|
||||
f.write(f"## Origem: {item['source']}\n")
|
||||
f.write(f"```{lang}\n{item['code']}\n```\n\n")
|
||||
|
||||
print(f"Snippets gerados em {output_dir}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_snippets()
|
||||
Reference in New Issue
Block a user