Spaces:
Sleeping
Sleeping
| import os | |
| import yaml | |
| from pathlib import Path | |
| from typing import Dict, Any | |
| import logging | |
| import sys | |
| sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
| from .utilites import load_environment_variables, validate_api_keys | |
| # Import logger here to avoid circular imports | |
| try: | |
| from logger.custom_logger import CustomLoggerTracker | |
| custom_log = CustomLoggerTracker() | |
| logger = custom_log.get_logger("config") | |
| except ImportError: | |
| # Fallback to standard logging if custom logger not available | |
| logger = logging.getLogger("config") | |
| class Config: | |
| def __init__(self, config_path: str = "config.yaml"): | |
| logger.info("Start Loading data from configs") | |
| load_environment_variables() | |
| self.config_path = Path(config_path) | |
| self.config = self._load_config() | |
| self._validate_config() | |
| # Validate API keys | |
| api_validation = validate_api_keys() | |
| if not api_validation['valid']: | |
| logger.warning(f"Some API keys missing: {api_validation['missing_required']}") | |
| # Don't raise error for missing optional keys, just warn | |
| def _load_config(self) -> Dict[str, Any]: | |
| try: | |
| if not self.config_path.exists(): | |
| logger.error(f"Configuration file not found: {self.config_path}") | |
| raise FileNotFoundError(f"Configuration file not found: {self.config_path}") | |
| with open(self.config_path, 'r', encoding='utf-8') as f: | |
| config = yaml.safe_load(f) | |
| logger.info(f"Configuration loaded successfully from {self.config_path}") | |
| return config | |
| except yaml.YAMLError as e: | |
| logger.error(f"Error parsing YAML configuration: {e}") | |
| raise | |
| except Exception as e: | |
| logger.error(f"Unexpected error loading configuration: {e}") | |
| raise | |
| def _validate_config(self) -> None: | |
| """Validate configuration based on the actual YAML structure.""" | |
| # Check if we have either the old structure (gemini_model) or new structure (models + rag_system) | |
| has_gemini = 'gemini_model' in self.config | |
| has_models_section = 'models' in self.config | |
| has_rag_section = 'rag_system' in self.config | |
| if not has_gemini and not has_models_section: | |
| logger.error("Missing required configuration: either 'gemini_model' or 'models' section must be configured") | |
| raise ValueError("Missing required configuration: either 'gemini_model' or 'models' section must be configured") | |
| # Validate models section if present | |
| if has_models_section: | |
| models_config = self.config['models'] | |
| required_models = ['embedding_model', 'llm_model'] | |
| for key in required_models: | |
| if key not in models_config: | |
| logger.error(f"Missing required model configuration: models.{key}") | |
| raise ValueError(f"Missing required model configuration: models.{key}") | |
| # Validate rag_system section if present (optional validation) | |
| if has_rag_section: | |
| rag_config = self.config['rag_system'] | |
| # These are optional but log if missing | |
| optional_rag_keys = ['chunk_size', 'chunk_overlap', 'max_context_chunks'] | |
| for key in optional_rag_keys: | |
| if key not in rag_config: | |
| logger.debug(f"Optional RAG configuration key not found: rag_system.{key}") | |
| # Validate vector store section if present | |
| if 'vector_store' in self.config: | |
| vector_config = self.config['vector_store'] | |
| if 'provider' in vector_config and vector_config['provider'] == 'qdrant': | |
| # Check for qdrant specific config | |
| if 'collection_name' not in vector_config: | |
| logger.warning("Qdrant collection_name not specified, will use default") | |
| logger.info("Configuration validation passed") | |
| def get(self, key: str, default: Any = None) -> Any: | |
| """Get configuration value by key, supporting nested keys with dot notation.""" | |
| keys = key.split('.') | |
| value = self.config | |
| try: | |
| for k in keys: | |
| value = value[k] | |
| logger.debug(f"Retrieved config value for '{key}': {value}") | |
| return value | |
| except (KeyError, TypeError): | |
| logger.debug(f"Config key '{key}' not found, returning default: {default}") | |
| return default | |
| def get_env_var(self, key: str, required: bool = True) -> str: | |
| value = os.getenv(key) | |
| if required and not value: | |
| logger.error(f"Required environment variable not found: {key}") | |
| raise ValueError(f"Required environment variable not found: {key}") | |
| if value: | |
| logger.info(f"Environment variable '{key}' loaded successfully") | |
| else: | |
| logger.warning(f"Optional environment variable '{key}' not found") | |
| return value | |
| def gemini_model(self) -> str: | |
| """Get Gemini model name (optional for RAG system).""" | |
| return self.get('gemini_model', 'models/gemini-2.5-flash') | |
| def google_api_key(self) -> str: | |
| """Get Google API key from environment.""" | |
| try: | |
| return self.get_env_var('GOOGLE_API_KEY') | |
| except ValueError: | |
| logger.warning("Google API key not found, this is optional for RAG-only usage") | |
| return "" | |
| # RAG System Properties | |
| def rag_config(self) -> Dict[str, Any]: | |
| """Get RAG system configuration, combining rag_system and models sections.""" | |
| rag_config = self.get('rag_system', {}).copy() | |
| # Add models to rag config if they exist | |
| models_config = self.get('models', {}) | |
| if models_config: | |
| rag_config.update(models_config) | |
| # Add performance settings | |
| performance_config = self.get('performance', {}) | |
| if performance_config: | |
| rag_config.update(performance_config) | |
| return rag_config | |
| def groq_api_key(self) -> str: | |
| GROQ_API_KEY=os.getenv('GROQ_API_KEY', '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') | |
| if GROQ_API_KEY: | |
| return GROQ_API_KEY | |
| return self.get('groq_api_key', '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') | |
| def groq_url(self) -> str: | |
| """Get Groq URL from environment or config.""" | |
| GROQ_URL = os.getenv('GROQ_URL', 'https://api.groq.com/openai/v1') | |
| if GROQ_URL: | |
| return GROQ_URL | |
| return self.get('groq_url', 'https://api.groq.com/openai/v1') | |
| def siliconflow_api_key(self) -> str: | |
| """Get Silicon Flow API key from environment.""" | |
| SILICONFLOW_API_KEY= os.getenv('SILICONFLOW_API_KEY', 'sk-mamyyymhoyklygepxyaazxpxiaphjjbbynxgdrzebbmusmwl') | |
| if SILICONFLOW_API_KEY: | |
| return SILICONFLOW_API_KEY | |
| return self.get('siliconflow_api_key', 'sk-mamyyymhoyklygepxyaazxpxiaphjjbbynxgdrzebbmusmwl') | |
| def qdrant_url(self) -> str: | |
| QDRANT_URL = os.getenv('QDRANT_URL', "https://50f53cc8-bbb0-4939-8254-8f025a577222.us-west-2-0.aws.cloud.qdrant.io") | |
| if QDRANT_URL: | |
| return QDRANT_URL | |
| return self.get('qdrant_url', "https://50f53cc8-bbb0-4939-8254-8f025a577222.us-west-2-0.aws.cloud.qdrant.io") | |
| def qdrant_api_key(self) -> str: | |
| """Get Qdrant API key from environment.""" | |
| QDRANT_API_KEY=os.getenv('QDRANT_API_KEY', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.L6Xwubquqwa9CXj3kVn9jiv64Cbe85vRdLv_LltuzZg') | |
| if QDRANT_API_KEY: | |
| return QDRANT_API_KEY | |
| return self.get('qdrant_api_key', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.L6Xwubquqwa9CXj3kVn9jiv64Cbe85vRdLv_LltuzZg') | |
| def document_processing_config(self) -> Dict[str, Any]: | |
| """Get document processing configuration.""" | |
| return self.get('document_processing', {}) | |
| def storage_config(self) -> Dict[str, Any]: | |
| """Get storage configuration.""" | |
| # Combine multiple storage-related sections | |
| storage_config = {} | |
| # Vector store config | |
| vector_store = self.get('vector_store', {}) | |
| if vector_store: | |
| storage_config.update(vector_store) | |
| # Cache config | |
| cache_config = self.get('cache', {}) | |
| if cache_config: | |
| storage_config.update(cache_config) | |
| # Add any storage-specific settings | |
| if 'storage' in self.config: | |
| storage_config.update(self.config['storage']) | |
| return storage_config | |
| # Test the configuration loading | |
| if __name__ == "__main__": | |
| try: | |
| config = Config() | |
| print("✅ Configuration loaded successfully!") | |
| print(f"RAG Config keys: {list(config.rag_config.keys())}") | |
| print(f"Has Groq API key: {'Yes' if config.groq_api_key else 'No'}") | |
| print(f"Has SiliconFlow API key: {'Yes' if config.siliconflow_api_key else 'No'}") | |
| print(f"Qdrant URL: {config.qdrant_url}") | |
| except Exception as e: | |
| print(f"❌ Configuration failed: {e}") |