import os
import mysql.connector
from PyPDF2 import PdfReader
from dotenv import load_dotenv

load_dotenv()

def get_db():
    return mysql.connector.connect(
        host=os.getenv("DB_HOST"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASS"),
        database=os.getenv("DB_NAME")
    )

def chunk_text(text, size=1000, overlap=200):
    chunks = []
    for i in range(0, len(text), size - overlap):
        chunks.append(text[i:i + size])
    return chunks

def process_pdfs(directory):
    db = get_db()
    cursor = db.cursor()
    
    # Clear old chunks to avoid duplicates if re-running
    cursor.execute("DELETE FROM knowledge_chunks")
    
    for filename in os.listdir(directory):
        if filename.endswith(".pdf"):
            print(f"📄 Processing {filename}...")
            filepath = os.path.join(directory, filename)
            try:
                reader = PdfReader(filepath)
                full_text = ""
                for page in reader.pages:
                    full_text += page.extract_text() + "\n"
                
                chunks = chunk_text(full_text)
                for chunk in chunks:
                    cursor.execute(
                        "INSERT INTO knowledge_chunks (source_file, content) VALUES (%s, %s)",
                        (filename, chunk)
                    )
                db.commit()
            except Exception as e:
                print(f"❌ Error processing {filename}: {e}")
                
    cursor.close()
    db.close()
    print("🎉 Knowledge Base updated with chunks!")

# process_kurss.py pēdējās rindiņas:

if __name__ == "__main__":
    # os.path.dirname(__file__) ir mape, kurā atrodas pats skripts (t.i., /bot/)
    kurss_dir = os.path.join(os.path.dirname(__file__), "knowledge_base", "KURSS")
    
    if os.path.exists(kurss_dir):
        process_pdfs(kurss_dir)
    else:
        print(f"Directory not found: {kurss_dir}")