Spaces:
Sleeping
Sleeping
Commit ·
795900a
1
Parent(s): 7da164e
ok
Browse files
app.py
CHANGED
|
@@ -4,6 +4,8 @@ import validators
|
|
| 4 |
import os
|
| 5 |
import tempfile
|
| 6 |
import uuid
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Import SEO modules
|
| 9 |
from modules.technical_seo import TechnicalSEOModule
|
|
@@ -29,6 +31,77 @@ llm_recommendations = LLMRecommendations()
|
|
| 29 |
# Store for generated reports (in production, use database)
|
| 30 |
reports_store = {}
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
@app.route('/')
|
| 33 |
def index():
|
| 34 |
return render_template('index.html')
|
|
@@ -49,12 +122,16 @@ def generate_report():
|
|
| 49 |
# Generate unique report ID
|
| 50 |
report_id = str(uuid.uuid4())
|
| 51 |
|
| 52 |
-
# Validate competitor URLs
|
|
|
|
| 53 |
competitor_list = []
|
| 54 |
for comp in competitors:
|
| 55 |
comp = comp.strip()
|
| 56 |
if comp and validators.url(comp):
|
| 57 |
competitor_list.append(comp)
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Technical SEO Analysis
|
| 60 |
technical_data = technical_module.analyze(url)
|
|
@@ -62,53 +139,64 @@ def generate_report():
|
|
| 62 |
# Content Audit
|
| 63 |
content_data = content_module.analyze(url)
|
| 64 |
|
| 65 |
-
# Keywords Analysis
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
'
|
| 92 |
-
'estimated_organic_traffic': 0,
|
| 93 |
-
'organic_keywords': 0,
|
| 94 |
-
'data_sources': ['API disabled to save credits'],
|
| 95 |
-
'placeholder': True,
|
| 96 |
-
'message': 'Backlinks analysis temporarily disabled to conserve API credits.'
|
| 97 |
-
}
|
| 98 |
|
| 99 |
# Generate LLM Recommendations
|
| 100 |
llm_rec_data = llm_recommendations.generate_recommendations(
|
| 101 |
url, technical_data, content_data, keywords_data, backlinks_data
|
| 102 |
)
|
| 103 |
|
| 104 |
-
# Competitor Analysis
|
| 105 |
competitor_data = []
|
| 106 |
for comp_url in competitor_list:
|
| 107 |
comp_technical = technical_module.analyze(comp_url)
|
| 108 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
| 109 |
-
|
| 110 |
-
#
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
competitor_data.append({
|
| 113 |
'url': comp_url,
|
| 114 |
'technical': comp_technical,
|
|
|
|
| 4 |
import os
|
| 5 |
import tempfile
|
| 6 |
import uuid
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
from typing import Dict, Any
|
| 9 |
|
| 10 |
# Import SEO modules
|
| 11 |
from modules.technical_seo import TechnicalSEOModule
|
|
|
|
| 31 |
# Store for generated reports (in production, use database)
|
| 32 |
reports_store = {}
|
| 33 |
|
| 34 |
+
def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 35 |
+
"""Transform new keywords data structure to match report generator expectations"""
|
| 36 |
+
if not new_data or new_data.get('placeholder'):
|
| 37 |
+
return {
|
| 38 |
+
'placeholder': True,
|
| 39 |
+
'message': 'No keyword data available',
|
| 40 |
+
'total_keywords': 0,
|
| 41 |
+
'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0},
|
| 42 |
+
'best_keywords': [],
|
| 43 |
+
'opportunity_keywords': [],
|
| 44 |
+
'data_source': 'Analysis failed'
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Transform new structure to old structure
|
| 48 |
+
totals = new_data.get('totals', {})
|
| 49 |
+
distribution = new_data.get('distribution', {})
|
| 50 |
+
movement = new_data.get('movement', {})
|
| 51 |
+
best_keywords = new_data.get('best_keywords', [])
|
| 52 |
+
declining_keywords = new_data.get('declining_keywords', [])
|
| 53 |
+
opportunities = new_data.get('opportunities', [])
|
| 54 |
+
data_sources = new_data.get('data_sources', {})
|
| 55 |
+
|
| 56 |
+
# Transform position distribution
|
| 57 |
+
pos_dist = {
|
| 58 |
+
'top_3': distribution.get('top3', 0),
|
| 59 |
+
'top_10': distribution.get('top10', 0),
|
| 60 |
+
'top_50': distribution.get('top50', 0),
|
| 61 |
+
'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
# Transform best keywords to match expected format
|
| 65 |
+
transformed_best_keywords = []
|
| 66 |
+
for kw in best_keywords:
|
| 67 |
+
transformed_best_keywords.append({
|
| 68 |
+
'keyword': kw.get('keyword', ''),
|
| 69 |
+
'position': kw.get('rank', 0),
|
| 70 |
+
'clicks': 0, # Not available in new API
|
| 71 |
+
'impressions': kw.get('volume', 0),
|
| 72 |
+
'url': kw.get('url', ''),
|
| 73 |
+
'estimated_traffic': kw.get('estimated_traffic', 0),
|
| 74 |
+
'trend': kw.get('trend', 'stable')
|
| 75 |
+
})
|
| 76 |
+
|
| 77 |
+
# Transform opportunities to match expected format
|
| 78 |
+
transformed_opportunities = []
|
| 79 |
+
for opp in opportunities:
|
| 80 |
+
transformed_opportunities.append({
|
| 81 |
+
'keyword': opp.get('keyword', ''),
|
| 82 |
+
'position': 0, # Not applicable for opportunities
|
| 83 |
+
'impressions': opp.get('volume', 0),
|
| 84 |
+
'ctr': 0, # Not available
|
| 85 |
+
'competitor_rank': opp.get('competitor_rank', 0),
|
| 86 |
+
'priority_score': opp.get('priority_score', 0),
|
| 87 |
+
'competitor_domain': opp.get('competitor_domain', '')
|
| 88 |
+
})
|
| 89 |
+
|
| 90 |
+
return {
|
| 91 |
+
'total_keywords': totals.get('keywords', 0),
|
| 92 |
+
'estimated_traffic': totals.get('estimated_traffic', 0),
|
| 93 |
+
'position_distribution': pos_dist,
|
| 94 |
+
'movement': movement,
|
| 95 |
+
'best_keywords': transformed_best_keywords,
|
| 96 |
+
'declining_keywords': declining_keywords,
|
| 97 |
+
'opportunity_keywords': transformed_opportunities,
|
| 98 |
+
'competitor_summary': new_data.get('competitor_summary', []),
|
| 99 |
+
'data_source': f"{data_sources.get('positions', 'Unknown')} + {data_sources.get('volume', 'Unknown')}",
|
| 100 |
+
'enrichment_rate': data_sources.get('enrichment_rate', 0),
|
| 101 |
+
'meta': new_data.get('meta', {}),
|
| 102 |
+
'placeholder': False
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
@app.route('/')
|
| 106 |
def index():
|
| 107 |
return render_template('index.html')
|
|
|
|
| 122 |
# Generate unique report ID
|
| 123 |
report_id = str(uuid.uuid4())
|
| 124 |
|
| 125 |
+
# Validate competitor URLs and extract domains
|
| 126 |
+
competitor_domains = []
|
| 127 |
competitor_list = []
|
| 128 |
for comp in competitors:
|
| 129 |
comp = comp.strip()
|
| 130 |
if comp and validators.url(comp):
|
| 131 |
competitor_list.append(comp)
|
| 132 |
+
# Extract domain from competitor URL
|
| 133 |
+
domain = urlparse(comp).netloc.replace('www.', '')
|
| 134 |
+
competitor_domains.append(domain)
|
| 135 |
|
| 136 |
# Technical SEO Analysis
|
| 137 |
technical_data = technical_module.analyze(url)
|
|
|
|
| 139 |
# Content Audit
|
| 140 |
content_data = content_module.analyze(url)
|
| 141 |
|
| 142 |
+
# Keywords Analysis - UPDATED: Pass competitor domains and handle errors
|
| 143 |
+
keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
|
| 144 |
+
if not keywords_result.success:
|
| 145 |
+
# Fallback to placeholder data if keywords analysis fails
|
| 146 |
+
keywords_data = {
|
| 147 |
+
'placeholder': True,
|
| 148 |
+
'message': f'Keywords analysis failed: {keywords_result.error}',
|
| 149 |
+
'total_keywords': 0,
|
| 150 |
+
'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0},
|
| 151 |
+
'best_keywords': [],
|
| 152 |
+
'opportunity_keywords': [],
|
| 153 |
+
'data_source': 'Analysis failed'
|
| 154 |
+
}
|
| 155 |
+
else:
|
| 156 |
+
# Transform new data structure to match report generator expectations
|
| 157 |
+
keywords_data = _transform_keywords_data(keywords_result.data)
|
| 158 |
+
|
| 159 |
+
# Backlinks Analysis - UNCOMMENTED: Enable backlinks analysis
|
| 160 |
+
print(f"DEBUG: Starting backlinks analysis for {url}")
|
| 161 |
+
backlinks_result = backlinks_module.analyze(url)
|
| 162 |
+
backlinks_data = backlinks_result.data
|
| 163 |
+
print(f"DEBUG: Backlinks analysis result - Success: {backlinks_result.success}")
|
| 164 |
+
print(f"DEBUG: Backlinks data keys: {list(backlinks_data.keys())}")
|
| 165 |
+
if backlinks_data.get('total_backlinks'):
|
| 166 |
+
print(f"DEBUG: Total backlinks found: {backlinks_data.get('total_backlinks')}")
|
| 167 |
+
if backlinks_data.get('placeholder'):
|
| 168 |
+
print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
# Generate LLM Recommendations
|
| 171 |
llm_rec_data = llm_recommendations.generate_recommendations(
|
| 172 |
url, technical_data, content_data, keywords_data, backlinks_data
|
| 173 |
)
|
| 174 |
|
| 175 |
+
# Competitor Analysis - UPDATED: Pass competitor domains
|
| 176 |
competitor_data = []
|
| 177 |
for comp_url in competitor_list:
|
| 178 |
comp_technical = technical_module.analyze(comp_url)
|
| 179 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
| 180 |
+
|
| 181 |
+
# Keywords analysis for competitor (no competitors for competitor analysis)
|
| 182 |
+
comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
|
| 183 |
+
if comp_keywords_result.success:
|
| 184 |
+
comp_keywords = _transform_keywords_data(comp_keywords_result.data)
|
| 185 |
+
else:
|
| 186 |
+
comp_keywords = {
|
| 187 |
+
'placeholder': True,
|
| 188 |
+
'message': f'Keywords analysis failed: {comp_keywords_result.error}',
|
| 189 |
+
'total_keywords': 0,
|
| 190 |
+
'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0},
|
| 191 |
+
'best_keywords': [],
|
| 192 |
+
'opportunity_keywords': [],
|
| 193 |
+
'data_source': 'Analysis failed'
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
# Backlinks analysis for competitor - UNCOMMENTED
|
| 197 |
+
comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
|
| 198 |
+
comp_backlinks = comp_backlinks_result.data
|
| 199 |
+
|
| 200 |
competitor_data.append({
|
| 201 |
'url': comp_url,
|
| 202 |
'technical': comp_technical,
|