feat: upgrade to async extractor, add RAG processing, link healing and Docker support

2026-05-07 18:43:43 -03:00
parent 68dc35abbd
commit 570292d8a9
116 changed files with 16277 additions and 388 deletions
@@ -0,0 +1,50 @@
+import os
+import glob
+import re
+
+def extract_snippets():
+    docs_dir = "fluig_rag_docs"
+    output_dir = os.path.join(docs_dir, "Biblioteca de Snippets")
+    os.makedirs(output_dir, exist_ok=True)
+    
+    files = glob.glob(os.path.join(docs_dir, "**", "*.md"), recursive=True)
+    snippets = {"javascript": [], "java": [], "sql": []}
+    
+    # Regex para blocos de código
+    code_pattern = re.compile(r"```(.*?)\n(.*?)\n```", re.DOTALL)
+    
+    print(f"Extraindo snippets de {len(files)} arquivos...")
+    
+    for file_path in files:
+        if "Biblioteca de Snippets" in file_path: continue
+        
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+            
+        title_match = re.search(r"title: (.*)", content)
+        doc_title = title_match.group(1) if title_match else os.path.basename(file_path)
+        
+        matches = code_pattern.findall(content)
+        for lang, code in matches:
+            lang = lang.strip().lower()
+            if lang in snippets:
+                snippets[lang].append({
+                    "source": doc_title,
+                    "code": code.strip()
+                })
+
+    # Gerar arquivos de snippets por linguagem
+    for lang, items in snippets.items():
+        if not items: continue
+        
+        snippet_file = os.path.join(output_dir, f"Snippets {lang.upper()}.md")
+        with open(snippet_file, "w", encoding="utf-8") as f:
+            f.write(f"# Biblioteca de Snippets: {lang.upper()}\n\n")
+            for item in items:
+                f.write(f"## Origem: {item['source']}\n")
+                f.write(f"```{lang}\n{item['code']}\n```\n\n")
+    
+    print(f"Snippets gerados em {output_dir}")
+
+if __name__ == "__main__":
+    extract_snippets()