A7m0d's picture
Upload folder using huggingface_hub
672d4d3 verified
import os
import yaml
from pathlib import Path
from typing import Dict, Any
import logging
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from .utilites import load_environment_variables, validate_api_keys
# Import logger here to avoid circular imports
try:
from logger.custom_logger import CustomLoggerTracker
custom_log = CustomLoggerTracker()
logger = custom_log.get_logger("config")
except ImportError:
# Fallback to standard logging if custom logger not available
logger = logging.getLogger("config")
class Config:
def __init__(self, config_path: str = "config.yaml"):
logger.info("Start Loading data from configs")
load_environment_variables()
self.config_path = Path(config_path)
self.config = self._load_config()
self._validate_config()
# Validate API keys
api_validation = validate_api_keys()
if not api_validation['valid']:
logger.warning(f"Some API keys missing: {api_validation['missing_required']}")
# Don't raise error for missing optional keys, just warn
def _load_config(self) -> Dict[str, Any]:
try:
if not self.config_path.exists():
logger.error(f"Configuration file not found: {self.config_path}")
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
with open(self.config_path, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
logger.info(f"Configuration loaded successfully from {self.config_path}")
return config
except yaml.YAMLError as e:
logger.error(f"Error parsing YAML configuration: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error loading configuration: {e}")
raise
def _validate_config(self) -> None:
"""Validate configuration based on the actual YAML structure."""
# Check if we have either the old structure (gemini_model) or new structure (models + rag_system)
has_gemini = 'gemini_model' in self.config
has_models_section = 'models' in self.config
has_rag_section = 'rag_system' in self.config
if not has_gemini and not has_models_section:
logger.error("Missing required configuration: either 'gemini_model' or 'models' section must be configured")
raise ValueError("Missing required configuration: either 'gemini_model' or 'models' section must be configured")
# Validate models section if present
if has_models_section:
models_config = self.config['models']
required_models = ['embedding_model', 'llm_model']
for key in required_models:
if key not in models_config:
logger.error(f"Missing required model configuration: models.{key}")
raise ValueError(f"Missing required model configuration: models.{key}")
# Validate rag_system section if present (optional validation)
if has_rag_section:
rag_config = self.config['rag_system']
# These are optional but log if missing
optional_rag_keys = ['chunk_size', 'chunk_overlap', 'max_context_chunks']
for key in optional_rag_keys:
if key not in rag_config:
logger.debug(f"Optional RAG configuration key not found: rag_system.{key}")
# Validate vector store section if present
if 'vector_store' in self.config:
vector_config = self.config['vector_store']
if 'provider' in vector_config and vector_config['provider'] == 'qdrant':
# Check for qdrant specific config
if 'collection_name' not in vector_config:
logger.warning("Qdrant collection_name not specified, will use default")
logger.info("Configuration validation passed")
def get(self, key: str, default: Any = None) -> Any:
"""Get configuration value by key, supporting nested keys with dot notation."""
keys = key.split('.')
value = self.config
try:
for k in keys:
value = value[k]
logger.debug(f"Retrieved config value for '{key}': {value}")
return value
except (KeyError, TypeError):
logger.debug(f"Config key '{key}' not found, returning default: {default}")
return default
def get_env_var(self, key: str, required: bool = True) -> str:
value = os.getenv(key)
if required and not value:
logger.error(f"Required environment variable not found: {key}")
raise ValueError(f"Required environment variable not found: {key}")
if value:
logger.info(f"Environment variable '{key}' loaded successfully")
else:
logger.warning(f"Optional environment variable '{key}' not found")
return value
@property
def gemini_model(self) -> str:
"""Get Gemini model name (optional for RAG system)."""
return self.get('gemini_model', 'models/gemini-2.5-flash')
@property
def google_api_key(self) -> str:
"""Get Google API key from environment."""
try:
return self.get_env_var('GOOGLE_API_KEY')
except ValueError:
logger.warning("Google API key not found, this is optional for RAG-only usage")
return ""
# RAG System Properties
@property
def rag_config(self) -> Dict[str, Any]:
"""Get RAG system configuration, combining rag_system and models sections."""
rag_config = self.get('rag_system', {}).copy()
# Add models to rag config if they exist
models_config = self.get('models', {})
if models_config:
rag_config.update(models_config)
# Add performance settings
performance_config = self.get('performance', {})
if performance_config:
rag_config.update(performance_config)
return rag_config
@property
def groq_api_key(self) -> str:
GROQ_API_KEY=os.getenv('GROQ_API_KEY', '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')
if GROQ_API_KEY:
return GROQ_API_KEY
return self.get('groq_api_key', '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')
@property
def groq_url(self) -> str:
"""Get Groq URL from environment or config."""
GROQ_URL = os.getenv('GROQ_URL', 'https://api.groq.com/openai/v1')
if GROQ_URL:
return GROQ_URL
return self.get('groq_url', 'https://api.groq.com/openai/v1')
@property
def siliconflow_api_key(self) -> str:
"""Get Silicon Flow API key from environment."""
SILICONFLOW_API_KEY= os.getenv('SILICONFLOW_API_KEY', 'sk-mamyyymhoyklygepxyaazxpxiaphjjbbynxgdrzebbmusmwl')
if SILICONFLOW_API_KEY:
return SILICONFLOW_API_KEY
return self.get('siliconflow_api_key', 'sk-mamyyymhoyklygepxyaazxpxiaphjjbbynxgdrzebbmusmwl')
@property
def qdrant_url(self) -> str:
QDRANT_URL = os.getenv('QDRANT_URL', "https://50f53cc8-bbb0-4939-8254-8f025a577222.us-west-2-0.aws.cloud.qdrant.io")
if QDRANT_URL:
return QDRANT_URL
return self.get('qdrant_url', "https://50f53cc8-bbb0-4939-8254-8f025a577222.us-west-2-0.aws.cloud.qdrant.io")
@property
def qdrant_api_key(self) -> str:
"""Get Qdrant API key from environment."""
QDRANT_API_KEY=os.getenv('QDRANT_API_KEY', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.L6Xwubquqwa9CXj3kVn9jiv64Cbe85vRdLv_LltuzZg')
if QDRANT_API_KEY:
return QDRANT_API_KEY
return self.get('qdrant_api_key', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.L6Xwubquqwa9CXj3kVn9jiv64Cbe85vRdLv_LltuzZg')
@property
def document_processing_config(self) -> Dict[str, Any]:
"""Get document processing configuration."""
return self.get('document_processing', {})
@property
def storage_config(self) -> Dict[str, Any]:
"""Get storage configuration."""
# Combine multiple storage-related sections
storage_config = {}
# Vector store config
vector_store = self.get('vector_store', {})
if vector_store:
storage_config.update(vector_store)
# Cache config
cache_config = self.get('cache', {})
if cache_config:
storage_config.update(cache_config)
# Add any storage-specific settings
if 'storage' in self.config:
storage_config.update(self.config['storage'])
return storage_config
# Test the configuration loading
if __name__ == "__main__":
try:
config = Config()
print("✅ Configuration loaded successfully!")
print(f"RAG Config keys: {list(config.rag_config.keys())}")
print(f"Has Groq API key: {'Yes' if config.groq_api_key else 'No'}")
print(f"Has SiliconFlow API key: {'Yes' if config.siliconflow_api_key else 'No'}")
print(f"Qdrant URL: {config.qdrant_url}")
except Exception as e:
print(f"❌ Configuration failed: {e}")