Spaces:

yashgori20
/

ThinklySEO

Running

App Files Files Community

yashgori20 commited on Aug 25

Commit

2ac1fd8

1 Parent(s): 16bd62f

ok

Browse files

Files changed (2) hide show

modules/keywords.py +113 -10
simple_pdf_generator.py +17 -25

modules/keywords.py CHANGED Viewed

@@ -32,10 +32,12 @@ class KeywordsModule:
         # RapidAPI endpoints
         self.enrichment_api_host = "google-keyword-insight1.p.rapidapi.com"
         # API priority order (tries in this order)
         self.api_sources = [
-            {'name': 'GoogleInsight', 'available': bool(self.rapidapi_key)},    # Primary: Google Keyword Insight
         ]
         # Performance Configuration
@@ -80,8 +82,11 @@ class KeywordsModule:
             # Try multiple API sources in order of preference
             main_domain_data = self._fetch_domain_keywords_multi_api(domain, quick_scan)
             if not main_domain_data['success']:
-                print("All keyword APIs failed - using mock data")
-                return self._generate_mock_keywords_data(domain, competitor_domains)
             # Fetch competitor data
             competitor_data = {}
@@ -125,14 +130,16 @@ class KeywordsModule:
         available_apis = [api for api in self.api_sources if api['available']]
         if not available_apis:
-            print("No keyword APIs configured - using mock data")
-            return {'success': True, 'data': self._generate_mock_domain_data(domain)}
         for api_source in available_apis:
             try:
                 print(f"Trying {api_source['name']} for keyword data...")
-                if api_source['name'] == 'GoogleInsight':
                     result = self._fetch_keywords_enrichment_only(domain, quick_scan)
                 else:
                     continue
@@ -147,8 +154,8 @@ class KeywordsModule:
                 print(f"{api_source['name']} failed: {str(e)}")
                 continue
-        print("All APIs failed, using mock data with real volumes if possible")
-        return {'success': True, 'data': self._generate_mock_domain_data(domain)}
     def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
@@ -248,12 +255,14 @@ class KeywordsModule:
         # Set data source label based on what was actually used
         if hasattr(self, '_current_api_source'):
-            if self._current_api_source == 'GoogleInsight':
                 data_source = 'Google Keyword Insight API (rankings estimated)'
             else:
                 data_source = f'{self._current_api_source} API'
         else:
-            data_source = 'Mock data (APIs unavailable)'
         return {
             'totals': totals,
@@ -724,3 +733,97 @@ class KeywordsModule:
             }
         }

         # RapidAPI endpoints
         self.enrichment_api_host = "google-keyword-insight1.p.rapidapi.com"
+        self.similarweb_url = "https://similarweb-traffic.p.rapidapi.com/traffic"
         # API priority order (tries in this order)
         self.api_sources = [
+            {'name': 'SimilarWeb', 'available': bool(self.rapidapi_key)},       # Primary: SimilarWeb Traffic
+            {'name': 'GoogleInsight', 'available': bool(self.rapidapi_key)},    # Fallback: Google Keyword Insight
         ]
         # Performance Configuration
             # Try multiple API sources in order of preference
             main_domain_data = self._fetch_domain_keywords_multi_api(domain, quick_scan)
             if not main_domain_data['success']:
+                return ModuleResult(
+                    success=False,
+                    data={},
+                    error="All keyword APIs failed - no real data available"
+                )
             # Fetch competitor data
             competitor_data = {}
         available_apis = [api for api in self.api_sources if api['available']]
         if not available_apis:
+            print("No keyword APIs configured")
+            return {'success': False, 'error': 'No RAPIDAPI_KEY configured'}
         for api_source in available_apis:
             try:
                 print(f"Trying {api_source['name']} for keyword data...")
+                if api_source['name'] == 'SimilarWeb':
+                    result = self._fetch_domain_keywords_similarweb(domain, quick_scan)
+                elif api_source['name'] == 'GoogleInsight':
                     result = self._fetch_keywords_enrichment_only(domain, quick_scan)
                 else:
                     continue
                 print(f"{api_source['name']} failed: {str(e)}")
                 continue
+        print("All APIs failed")
+        return {'success': False, 'error': 'All keyword APIs failed'}
     def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
         # Set data source label based on what was actually used
         if hasattr(self, '_current_api_source'):
+            if self._current_api_source == 'SimilarWeb':
+                data_source = 'SimilarWeb Traffic API'
+            elif self._current_api_source == 'GoogleInsight':
                 data_source = 'Google Keyword Insight API (rankings estimated)'
             else:
                 data_source = f'{self._current_api_source} API'
         else:
+            data_source = 'Real API data unavailable'
         return {
             'totals': totals,
             }
         }
+    def _fetch_domain_keywords_similarweb(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
+        """Fetch keyword data from SimilarWeb Traffic API"""
+        try:
+            headers = {
+                'x-rapidapi-key': self.rapidapi_key,
+                'x-rapidapi-host': 'similarweb-traffic.p.rapidapi.com'
+            }
+            params = {'domain': domain}
+            response = requests.get(self.similarweb_url, headers=headers, params=params, timeout=self.timeout)
+            if response.status_code == 429:
+                print("SimilarWeb API quota exceeded")
+                raise Exception("Quota exceeded")
+            elif response.status_code == 403:
+                print("SimilarWeb API subscription required")
+                raise Exception("Not subscribed to SimilarWeb API")
+            elif response.status_code != 200:
+                print(f"SimilarWeb API error {response.status_code}: {response.text}")
+                raise Exception(f"API error {response.status_code}")
+            data = response.json()
+            # Extract top keywords from SimilarWeb response
+            top_keywords = data.get('TopKeywords', [])
+            if not top_keywords:
+                raise Exception("No keywords found in SimilarWeb response")
+            # Transform SimilarWeb data to our format
+            keywords = []
+            for i, kw_data in enumerate(top_keywords[:20]):  # Limit to top 20
+                keyword = kw_data.get('Name', '')
+                volume = kw_data.get('Volume', 0)
+                estimated_value = kw_data.get('EstimatedValue', 0)
+                # Estimate ranking based on estimated value (higher value = better ranking)
+                # Top keywords are likely ranking well for the domain
+                estimated_rank = min(i + 1, 10) if i < 10 else min(i + 5, 50)
+                # Calculate estimated traffic from the estimated value
+                estimated_traffic = int(estimated_value / 10) if estimated_value else 0
+                keywords.append({
+                    'keyword': keyword,
+                    'rank': estimated_rank,
+                    'avg_search_volume': volume,
+                    'estimated_traffic_volume': estimated_traffic,
+                    'estimated_value': estimated_value
+                })
+            # Calculate domain statistics based on SimilarWeb data
+            total_keywords = len(keywords)
+            top3 = sum(1 for k in keywords if k['rank'] <= 3)
+            top10 = sum(1 for k in keywords if k['rank'] <= 10)
+            top50 = sum(1 for k in keywords if k['rank'] <= 50)
+            # Get additional traffic metrics from SimilarWeb
+            engagements = data.get('Engagements', {})
+            visits = int(engagements.get('Visits', 0))
+            stats = {
+                'organic': {
+                    'keywords_in_pos_1': sum(1 for k in keywords if k['rank'] == 1),
+                    'keywords_in_pos_2_3': sum(1 for k in keywords if 2 <= k['rank'] <= 3),
+                    'keywords_in_pos_4_10': sum(1 for k in keywords if 4 <= k['rank'] <= 10),
+                    'keywords_in_pos_11_20': sum(1 for k in keywords if 11 <= k['rank'] <= 20),
+                    'keywords_in_pos_21_50': sum(1 for k in keywords if 21 <= k['rank'] <= 50),
+                    'total_keywords_count': total_keywords,
+                    'Estimated_traffic_volume': sum(k['estimated_traffic_volume'] for k in keywords),
+                    'is_new': 0,  # SimilarWeb doesn't provide historical comparison
+                    'is_up': 0,
+                    'is_down': 0,
+                    'is_lost': 0
+                }
+            }
+            return {
+                'success': True,
+                'data': {
+                    'domain': domain,
+                    'statistics': stats,
+                    'keywords': keywords,
+                    'traffic_data': {
+                        'monthly_visits': visits,
+                        'global_rank': data.get('GlobalRank', {}).get('Rank', 0),
+                        'bounce_rate': engagements.get('BounceRate', 0)
+                    }
+                }
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}

simple_pdf_generator.py CHANGED Viewed

@@ -7,36 +7,28 @@ import io
 import re
 from typing import Dict, Any
 class SimplePDFGenerator:
     def __init__(self):
-        self.available = False
-        try:
-            import reportlab
-            import bs4
-            self.available = True
-        except ImportError:
-            self.available = False
     def generate_pdf(self, html_content: str) -> bytes:
         if not self.available:
-            raise ImportError("PDF generation requires reportlab: pip install reportlab")
-        try:
-            from reportlab.pdfgen import canvas
-            from reportlab.lib.pagesizes import letter, A4
-            from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
-            from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
-            from reportlab.lib.units import inch
-            from reportlab.lib.colors import Color, black, blue, green, red
-        except ImportError as e:
-            raise ImportError(f"PDF generation requires reportlab components: {e}")
-        try:
-            from bs4 import BeautifulSoup
-        except ImportError:
-            raise ImportError("PDF generation requires beautifulsoup4: pip install beautifulsoup4")
-        import re
         # Parse HTML and extract content
         soup = BeautifulSoup(html_content, 'html.parser')

 import re
 from typing import Dict, Any
+# Try to import all PDF dependencies at module level
+try:
+    from reportlab.pdfgen import canvas
+    from reportlab.lib.pagesizes import letter, A4
+    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+    from reportlab.lib.units import inch
+    from reportlab.lib.colors import Color, black, blue, green, red
+    from bs4 import BeautifulSoup
+    PDF_AVAILABLE = True
+except ImportError as e:
+    PDF_AVAILABLE = False
+    PDF_ERROR = str(e)
 class SimplePDFGenerator:
     def __init__(self):
+        self.available = PDF_AVAILABLE
     def generate_pdf(self, html_content: str) -> bytes:
         if not self.available:
+            error_msg = PDF_ERROR if 'PDF_ERROR' in globals() else "PDF generation requires reportlab and beautifulsoup4"
+            raise ImportError(error_msg)
         # Parse HTML and extract content
         soup = BeautifulSoup(html_content, 'html.parser')