import os import glob import re def extract_snippets(): docs_dir = "fluig_rag_docs" output_dir = os.path.join(docs_dir, "Biblioteca de Snippets") os.makedirs(output_dir, exist_ok=True) files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True) snippets = {"javascript": [], "java": [], "sql": []} # Regex para blocos de código code_pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL) print(f"Extraindo snippets de {len(files)} arquivos...") for file_path in files: if "Biblioteca de Snippets" in file_path: continue with open(file_path, "r", encoding="utf-8") as f: content = f.read() title_match = re.search(r"title: (.*)", content) doc_title = title_match.group(1) if title_match else os.path.basename(file_path) matches = code_pattern.findall(content) for lang, code in matches: lang = lang.strip().lower() if lang in snippets: snippets[lang].append({ "source": doc_title, "code": code.strip() }) # Gerar arquivos de snippets por linguagem for lang, items in snippets.items(): if not items: continue snippet_file = os.path.join(output_dir, f"Snippets {lang.upper()}.md") with open(snippet_file, "w", encoding="utf-8") as f: f.write(f"# Biblioteca de Snippets: {lang.upper()}\n\n") for item in items: f.write(f"## Origem: {item['source']}\n") f.write(f"```{lang}\n{item['code']}\n```\n\n") print(f"Snippets gerados em {output_dir}") if __name__ == "__main__": extract_snippets()