yashgori20 commited on
Commit
2ac1fd8
·
1 Parent(s): 16bd62f
Files changed (2) hide show
  1. modules/keywords.py +113 -10
  2. simple_pdf_generator.py +17 -25
modules/keywords.py CHANGED
@@ -32,10 +32,12 @@ class KeywordsModule:
32
 
33
  # RapidAPI endpoints
34
  self.enrichment_api_host = "google-keyword-insight1.p.rapidapi.com"
 
35
 
36
  # API priority order (tries in this order)
37
  self.api_sources = [
38
- {'name': 'GoogleInsight', 'available': bool(self.rapidapi_key)}, # Primary: Google Keyword Insight
 
39
  ]
40
 
41
  # Performance Configuration
@@ -80,8 +82,11 @@ class KeywordsModule:
80
  # Try multiple API sources in order of preference
81
  main_domain_data = self._fetch_domain_keywords_multi_api(domain, quick_scan)
82
  if not main_domain_data['success']:
83
- print("All keyword APIs failed - using mock data")
84
- return self._generate_mock_keywords_data(domain, competitor_domains)
 
 
 
85
 
86
  # Fetch competitor data
87
  competitor_data = {}
@@ -125,14 +130,16 @@ class KeywordsModule:
125
  available_apis = [api for api in self.api_sources if api['available']]
126
 
127
  if not available_apis:
128
- print("No keyword APIs configured - using mock data")
129
- return {'success': True, 'data': self._generate_mock_domain_data(domain)}
130
 
131
  for api_source in available_apis:
132
  try:
133
  print(f"Trying {api_source['name']} for keyword data...")
134
 
135
- if api_source['name'] == 'GoogleInsight':
 
 
136
  result = self._fetch_keywords_enrichment_only(domain, quick_scan)
137
  else:
138
  continue
@@ -147,8 +154,8 @@ class KeywordsModule:
147
  print(f"{api_source['name']} failed: {str(e)}")
148
  continue
149
 
150
- print("All APIs failed, using mock data with real volumes if possible")
151
- return {'success': True, 'data': self._generate_mock_domain_data(domain)}
152
 
153
 
154
  def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
@@ -248,12 +255,14 @@ class KeywordsModule:
248
 
249
  # Set data source label based on what was actually used
250
  if hasattr(self, '_current_api_source'):
251
- if self._current_api_source == 'GoogleInsight':
 
 
252
  data_source = 'Google Keyword Insight API (rankings estimated)'
253
  else:
254
  data_source = f'{self._current_api_source} API'
255
  else:
256
- data_source = 'Mock data (APIs unavailable)'
257
 
258
  return {
259
  'totals': totals,
@@ -724,3 +733,97 @@ class KeywordsModule:
724
  }
725
  }
726
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # RapidAPI endpoints
34
  self.enrichment_api_host = "google-keyword-insight1.p.rapidapi.com"
35
+ self.similarweb_url = "https://similarweb-traffic.p.rapidapi.com/traffic"
36
 
37
  # API priority order (tries in this order)
38
  self.api_sources = [
39
+ {'name': 'SimilarWeb', 'available': bool(self.rapidapi_key)}, # Primary: SimilarWeb Traffic
40
+ {'name': 'GoogleInsight', 'available': bool(self.rapidapi_key)}, # Fallback: Google Keyword Insight
41
  ]
42
 
43
  # Performance Configuration
 
82
  # Try multiple API sources in order of preference
83
  main_domain_data = self._fetch_domain_keywords_multi_api(domain, quick_scan)
84
  if not main_domain_data['success']:
85
+ return ModuleResult(
86
+ success=False,
87
+ data={},
88
+ error="All keyword APIs failed - no real data available"
89
+ )
90
 
91
  # Fetch competitor data
92
  competitor_data = {}
 
130
  available_apis = [api for api in self.api_sources if api['available']]
131
 
132
  if not available_apis:
133
+ print("No keyword APIs configured")
134
+ return {'success': False, 'error': 'No RAPIDAPI_KEY configured'}
135
 
136
  for api_source in available_apis:
137
  try:
138
  print(f"Trying {api_source['name']} for keyword data...")
139
 
140
+ if api_source['name'] == 'SimilarWeb':
141
+ result = self._fetch_domain_keywords_similarweb(domain, quick_scan)
142
+ elif api_source['name'] == 'GoogleInsight':
143
  result = self._fetch_keywords_enrichment_only(domain, quick_scan)
144
  else:
145
  continue
 
154
  print(f"{api_source['name']} failed: {str(e)}")
155
  continue
156
 
157
+ print("All APIs failed")
158
+ return {'success': False, 'error': 'All keyword APIs failed'}
159
 
160
 
161
  def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
 
255
 
256
  # Set data source label based on what was actually used
257
  if hasattr(self, '_current_api_source'):
258
+ if self._current_api_source == 'SimilarWeb':
259
+ data_source = 'SimilarWeb Traffic API'
260
+ elif self._current_api_source == 'GoogleInsight':
261
  data_source = 'Google Keyword Insight API (rankings estimated)'
262
  else:
263
  data_source = f'{self._current_api_source} API'
264
  else:
265
+ data_source = 'Real API data unavailable'
266
 
267
  return {
268
  'totals': totals,
 
733
  }
734
  }
735
 
736
+ def _fetch_domain_keywords_similarweb(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
737
+ """Fetch keyword data from SimilarWeb Traffic API"""
738
+ try:
739
+ headers = {
740
+ 'x-rapidapi-key': self.rapidapi_key,
741
+ 'x-rapidapi-host': 'similarweb-traffic.p.rapidapi.com'
742
+ }
743
+
744
+ params = {'domain': domain}
745
+
746
+ response = requests.get(self.similarweb_url, headers=headers, params=params, timeout=self.timeout)
747
+
748
+ if response.status_code == 429:
749
+ print("SimilarWeb API quota exceeded")
750
+ raise Exception("Quota exceeded")
751
+ elif response.status_code == 403:
752
+ print("SimilarWeb API subscription required")
753
+ raise Exception("Not subscribed to SimilarWeb API")
754
+ elif response.status_code != 200:
755
+ print(f"SimilarWeb API error {response.status_code}: {response.text}")
756
+ raise Exception(f"API error {response.status_code}")
757
+
758
+ data = response.json()
759
+
760
+ # Extract top keywords from SimilarWeb response
761
+ top_keywords = data.get('TopKeywords', [])
762
+ if not top_keywords:
763
+ raise Exception("No keywords found in SimilarWeb response")
764
+
765
+ # Transform SimilarWeb data to our format
766
+ keywords = []
767
+ for i, kw_data in enumerate(top_keywords[:20]): # Limit to top 20
768
+ keyword = kw_data.get('Name', '')
769
+ volume = kw_data.get('Volume', 0)
770
+ estimated_value = kw_data.get('EstimatedValue', 0)
771
+
772
+ # Estimate ranking based on estimated value (higher value = better ranking)
773
+ # Top keywords are likely ranking well for the domain
774
+ estimated_rank = min(i + 1, 10) if i < 10 else min(i + 5, 50)
775
+
776
+ # Calculate estimated traffic from the estimated value
777
+ estimated_traffic = int(estimated_value / 10) if estimated_value else 0
778
+
779
+ keywords.append({
780
+ 'keyword': keyword,
781
+ 'rank': estimated_rank,
782
+ 'avg_search_volume': volume,
783
+ 'estimated_traffic_volume': estimated_traffic,
784
+ 'estimated_value': estimated_value
785
+ })
786
+
787
+ # Calculate domain statistics based on SimilarWeb data
788
+ total_keywords = len(keywords)
789
+ top3 = sum(1 for k in keywords if k['rank'] <= 3)
790
+ top10 = sum(1 for k in keywords if k['rank'] <= 10)
791
+ top50 = sum(1 for k in keywords if k['rank'] <= 50)
792
+
793
+ # Get additional traffic metrics from SimilarWeb
794
+ engagements = data.get('Engagements', {})
795
+ visits = int(engagements.get('Visits', 0))
796
+
797
+ stats = {
798
+ 'organic': {
799
+ 'keywords_in_pos_1': sum(1 for k in keywords if k['rank'] == 1),
800
+ 'keywords_in_pos_2_3': sum(1 for k in keywords if 2 <= k['rank'] <= 3),
801
+ 'keywords_in_pos_4_10': sum(1 for k in keywords if 4 <= k['rank'] <= 10),
802
+ 'keywords_in_pos_11_20': sum(1 for k in keywords if 11 <= k['rank'] <= 20),
803
+ 'keywords_in_pos_21_50': sum(1 for k in keywords if 21 <= k['rank'] <= 50),
804
+ 'total_keywords_count': total_keywords,
805
+ 'Estimated_traffic_volume': sum(k['estimated_traffic_volume'] for k in keywords),
806
+ 'is_new': 0, # SimilarWeb doesn't provide historical comparison
807
+ 'is_up': 0,
808
+ 'is_down': 0,
809
+ 'is_lost': 0
810
+ }
811
+ }
812
+
813
+ return {
814
+ 'success': True,
815
+ 'data': {
816
+ 'domain': domain,
817
+ 'statistics': stats,
818
+ 'keywords': keywords,
819
+ 'traffic_data': {
820
+ 'monthly_visits': visits,
821
+ 'global_rank': data.get('GlobalRank', {}).get('Rank', 0),
822
+ 'bounce_rate': engagements.get('BounceRate', 0)
823
+ }
824
+ }
825
+ }
826
+
827
+ except Exception as e:
828
+ return {'success': False, 'error': str(e)}
829
+
simple_pdf_generator.py CHANGED
@@ -7,36 +7,28 @@ import io
7
  import re
8
  from typing import Dict, Any
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class SimplePDFGenerator:
11
  def __init__(self):
12
- self.available = False
13
- try:
14
- import reportlab
15
- import bs4
16
- self.available = True
17
- except ImportError:
18
- self.available = False
19
 
20
  def generate_pdf(self, html_content: str) -> bytes:
21
  if not self.available:
22
- raise ImportError("PDF generation requires reportlab: pip install reportlab")
23
-
24
- try:
25
- from reportlab.pdfgen import canvas
26
- from reportlab.lib.pagesizes import letter, A4
27
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
28
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
29
- from reportlab.lib.units import inch
30
- from reportlab.lib.colors import Color, black, blue, green, red
31
- except ImportError as e:
32
- raise ImportError(f"PDF generation requires reportlab components: {e}")
33
-
34
- try:
35
- from bs4 import BeautifulSoup
36
- except ImportError:
37
- raise ImportError("PDF generation requires beautifulsoup4: pip install beautifulsoup4")
38
-
39
- import re
40
 
41
  # Parse HTML and extract content
42
  soup = BeautifulSoup(html_content, 'html.parser')
 
7
  import re
8
  from typing import Dict, Any
9
 
10
+ # Try to import all PDF dependencies at module level
11
+ try:
12
+ from reportlab.pdfgen import canvas
13
+ from reportlab.lib.pagesizes import letter, A4
14
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
16
+ from reportlab.lib.units import inch
17
+ from reportlab.lib.colors import Color, black, blue, green, red
18
+ from bs4 import BeautifulSoup
19
+ PDF_AVAILABLE = True
20
+ except ImportError as e:
21
+ PDF_AVAILABLE = False
22
+ PDF_ERROR = str(e)
23
+
24
  class SimplePDFGenerator:
25
  def __init__(self):
26
+ self.available = PDF_AVAILABLE
 
 
 
 
 
 
27
 
28
  def generate_pdf(self, html_content: str) -> bytes:
29
  if not self.available:
30
+ error_msg = PDF_ERROR if 'PDF_ERROR' in globals() else "PDF generation requires reportlab and beautifulsoup4"
31
+ raise ImportError(error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Parse HTML and extract content
34
  soup = BeautifulSoup(html_content, 'html.parser')