import json
import logging
import os
import smtplib
from datetime import datetime, date, timedelta
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from zoneinfo import ZoneInfo
import pandas as pd
import requests
from dotenv import load_dotenv
from openpyxl.styles import Font, PatternFill, Alignment

# Setup logging
logging.basicConfig(
    filename='diavgeia_6_months.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

load_dotenv()

# Email configuration from environment variables
EMAIL_CONFIG = {
    "sender": os.getenv("EMAIL_SENDER", "price.monitor.email@gmail.com"),
    "billy": os.getenv("EMAIL_BILLY", "price.monitor.email@gmail.com"),
    "password": os.getenv("EMAIL_PASSWORD")
}

# Validate that required email password is set
if not EMAIL_CONFIG["password"]:
    logging.error("EMAIL_PASSWORD environment variable is required but not set")
    raise ValueError("EMAIL_PASSWORD environment variable is required but not set")


def send_email_error(project_name, error):
    """Send error notification email to Billy"""
    try:
        subject = f"❌ ERROR: {project_name}"

        html_body = f"""
        <html>
          <body>
            <h2 style="color: red;">Error in {project_name}</h2>
            <p><strong>Time:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
            <p><strong>Error Details:</strong></p>
            <pre style="background-color: #f5f5f5; padding: 10px; border-radius: 5px;">
{str(error)}
            </pre>
            <hr>
            <p>Please check the logs for more details.</p>
          </body>
        </html>
        """

        msg = MIMEMultipart()
        msg['Subject'] = subject
        msg['From'] = EMAIL_CONFIG["sender"]
        msg['To'] = EMAIL_CONFIG["billy"]

        msg.attach(MIMEText(html_body, 'html'))

        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
            smtp.login(EMAIL_CONFIG["sender"], EMAIL_CONFIG["password"])
            smtp.send_message(msg)
            logging.info(f"Error email sent to Billy")
    except Exception as e:
        logging.error(f"Failed to send error email: {e}")


def load_cpv_codes(filepath='CPV.xlsx'):
    """Load CPV codes from Excel file"""
    try:
        df = pd.read_excel(filepath)
        cpv_codes = df['CPV'].astype(str).str.strip().tolist()
        logging.info(f"Loaded {len(cpv_codes)} CPV codes from {filepath}")
        return cpv_codes
    except Exception as e:
        logging.error(f"Error loading CPV codes: {e}")
        raise


def fetch_decisions(cpv_codes, start_date, end_date):
    """Fetch decisions from Diavgeia API for given CPV codes and date range"""
    page_num = 0
    final_data = []

    # Format CPV codes for API query
    cpv_query = ','.join([f'"{code}"' for code in cpv_codes])

    logging.info(f"Starting scrape for dates {start_date} to {end_date}")

    while True:
        url = "https://diavgeia.gov.gr/luminapi/opendata/search/advanced.json"
        params = {
            "page": page_num,
            "size": 100,
            "q": f'cpv:[{cpv_query}] AND issueDate:[DT({start_date}T00:00:00) TO DT({end_date}T23:59:59)]'
        }

        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json()

            actual_size = data.get("info", {}).get("actualSize", 0)
            logging.info(f"Page {page_num}: {actual_size} decisions found")

            final_data.extend(data.get("decisions", []))

            if actual_size == 0:
                break

            page_num += 1
        except Exception as e:
            logging.error(f"Error fetching page {page_num}: {e}")
            break

    logging.info(f"Total decisions fetched: {len(final_data)}")
    return final_data


def create_excel_report(decisions, output_filename):
    """Create Excel report from decisions data"""

    # Extract important fields from each decision
    report_data = []
    for decision in decisions:
        with open('decision.json', 'a', encoding='utf-8') as f:
            json.dump(decision, f, ensure_ascii=False)
            f.write('\n')

        cpv_codes = []
        extra_fields = decision.get('extraFieldValues', {})
        decision_type = decision.get('decisionTypeId', '')

        if decision_type.startswith('Β'):
            cpv_set = set()
            for sponsor in extra_fields.get('sponsor', []):
                cpv_list = sponsor.get('cpv', [])
                if isinstance(cpv_list, list):
                    cpv_set.update([c for c in cpv_list if c])
                elif cpv_list:
                    cpv_set.add(cpv_list)
            cpv_codes = sorted(list(cpv_set))
        else:
            cpv_codes = [c for c in extra_fields.get('cpv', []) if c]

        report_data.append({
            'ADA': decision.get('ada', ''),
            'Protocol Number': decision.get('protocolNumber', ''),
            'Subject': decision.get('subject', ''),
            'Issue Date': datetime.fromtimestamp(
                decision.get('issueDate', 0) / 1000
            ).strftime('%Y-%m-%d') if decision.get('issueDate') else '',
            'Decision Type': decision_type,
            'CPV Codes': ', '.join(cpv_codes) if cpv_codes else '',
            'Organization ID': decision.get('organizationId', ''),
            'URL': f"https://diavgeia.gov.gr/decision/view/{decision.get('ada', '')}",
            'Document URL': decision.get('documentUrl', '')
        })

    # Create DataFrame
    df = pd.DataFrame(report_data)

    # Write to Excel with formatting
    with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='Decisions', index=False)

        workbook = writer.book
        worksheet = writer.sheets['Decisions']

        # Format header row
        header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
        header_font = Font(bold=True, color='FFFFFF')

        for cell in worksheet[1]:
            cell.fill = header_fill
            cell.font = header_font
            cell.alignment = Alignment(horizontal='center', vertical='center')

        # Auto-adjust column widths
        for column in worksheet.columns:
            max_length = 0
            column_letter = column[0].column_letter
            for cell in column:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(str(cell.value))
                except:
                    pass
            adjusted_width = min(max_length + 2, 80)
            worksheet.column_dimensions[column_letter].width = adjusted_width
    return report_data


def generate_web_dashboard_json(excel_filename, json_output_path):
    """Generate JSON file for the web dashboard from Excel data"""
    try:
        # Read the Excel file
        df = pd.read_excel(excel_filename)

        # Replace NaN values with None (which becomes null in JSON)
        df = df.where(pd.notnull(df), None)

        # Convert to records (list of dictionaries)
        records = df.to_dict('records')

        greek_tz = ZoneInfo('Europe/Athens')
        now = datetime.now(greek_tz)
        formatted_date = now.strftime("%Y-%m-%d %H:%M")

        # Create the JSON structure
        json_data = {
            "last_updated": formatted_date,
            "total_decisions": len(records),
            "data": records
        }

        # Ensure directory exists
        os.makedirs(os.path.dirname(json_output_path), exist_ok=True)

        # Save to JSON file
        with open(json_output_path, 'w', encoding='utf-8') as f:
            json.dump(json_data, f, ensure_ascii=False, indent=2)

        logging.info(f"Web dashboard JSON created")

    except Exception as e:
        logging.error(f"Error generating web dashboard JSON: {e}")
        raise


def main():
    logging.info("Starting Diavgeia scraper")
    try:
        # Load CPV codes
        cpv_codes = load_cpv_codes('CPV.xlsx')

        # Set date range: today and 179 days back (6 months - 1 day to be safe)
        to_date = date.today().isoformat()  # e.g., "2026-01-24"
        from_date = (date.today() - timedelta(days=180)).isoformat()

        logging.info(f"Date range: {from_date} to {to_date} (6 months)")

        # Fetch decisions
        decisions = fetch_decisions(cpv_codes, from_date, to_date)

        if not decisions:
            logging.info("No decisions found\n")
            return

        # Create Excel report with 6-month filename
        output_filename = "decisions_last_6.xlsx"
        report_data = create_excel_report(decisions, output_filename)

        logging.info(f"Created Excel file")

        # Generate JSON for web dashboard
        json_output_path = "web_page/data/decisions_last_6.json"
        generate_web_dashboard_json(output_filename, json_output_path)

        logging.info(f"Scraper completed successfully. Files: {output_filename}, {json_output_path}\n")

    except Exception as e:
        logging.error(f"Fatal error in main execution: {e}", exc_info=True)
        send_email_error("Diavgeia Project", str(e))


if __name__ == "__main__":
    main()
