export async function processHTMLContent(url) {
  try {
    // Verificar si es un PDF
    if (url.toLowerCase().endsWith('.pdf')) {
      return {
        type: 'pdf',
        url: url,
        title: url.split('/').pop().replace('.pdf', ''),
        isPDF: true
      };
    }

    // Procesar HTML
    const response = await fetch(url);
    const html = await response.text();
    
    const parser = new DOMParser();
    const doc = parser.parseFromString(html, 'text/html');
    
    // Extraer el contenido principal
    const mainContent = extractMainContent(doc);
    
    // Buscar la tabla principal si existe
    const mainTable = doc.querySelector('table[bordercolor="#cc99ff"]');
    
    return {
      type: 'html',
      title: doc.querySelector('title')?.textContent || '',
      mainContent: mainContent,
      tableContent: mainTable?.innerHTML || '',
      styles: extractStyles(doc),
      isPDF: false
    };
  } catch (error) {
    console.error('Error procesando el contenido:', error);
    return null;
  }
}

function extractMainContent(doc) {
  // Intentar obtener el contenido en orden de prioridad
  const contentSelectors = [
    'blockquote',
    '.Section1',
    'td[bgcolor="#FFFFFF"]',
    'td[valign="top"] > div[align="justify"]',
    'table[width="100%"] td[valign="top"]',
    'div.Section1'
  ];

  for (const selector of contentSelectors) {
    const element = doc.querySelector(selector);
    if (element?.innerHTML) {
      // Limpiar el contenido de scripts y comentarios
      let content = element.innerHTML
        .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
        .replace(/<!--[\s\S]*?-->/g, '')
        .trim();
      return content;
    }
  }

  // Si no se encuentra contenido específico, intentar obtener el contenido del body
  const bodyContent = doc.querySelector('body');
  if (bodyContent) {
    return bodyContent.innerHTML
      .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
      .replace(/<!--[\s\S]*?-->/g, '')
      .trim();
  }

  return '';
}

function extractStyles(doc) {
  const styles = [];
  
  // Obtener todos los elementos style
  const styleElements = doc.querySelectorAll('style');
  styleElements.forEach(style => {
    let styleContent = style.textContent
      .replace(/<!--/g, '')
      .replace(/-->/g, '')
      .trim();
    if (styleContent) {
      styles.push(styleContent);
    }
  });

  return styles.join('\n');
}

export function parsePublicationsHTML(htmlContent) {
    const parser = new DOMParser();
    const doc = parser.parseFromString(htmlContent, 'text/html');
    const publications = [];
    
    // Find the main content table cells (publication entries)
    const publicationRows = Array.from(doc.querySelectorAll('table[bordercolor="#cc99ff"] tr'))
      .filter(row => {
        const bulletCell = row.querySelector('td:first-child img[src*="BD10263_"], td:first-child img[src*="lista"]');
        return bulletCell != null;
      });
    
    publicationRows.forEach((row, index) => {
      const contentCell = row.querySelector('td:last-child');
      if (!contentCell) return;
      
      // Extract date
      const dateMatch = contentCell.textContent.match(/(\d{1,2}[-/_]\d{1,2}[-/_]\d{4}|\d{1,2}[-/_]\d{4}|\d{4})/);
      let date = dateMatch ? dateMatch[1].replace(/_/g, '-') : null;
      
      // Extract main link and title
      const mainLink = contentCell.querySelector('a');
      if (!mainLink) return;
      
      let title = '';
      let description = '';
      
      // Get the raw text content
      const fullText = contentCell.textContent.trim();
      
      // Extract title and clean it
      title = mainLink.textContent.trim();
      if (date && title.includes(date)) {
        title = title.replace(date, '').replace(/^[-:.,\s]+/, '').trim();
      }
      
      // Get description (text after the title)
      description = fullText.replace(date || '', '')
                           .replace(title, '')
                           .replace(/^[-:.,\s]+/, '')
                           .trim();
      
      // Handle sub-items if present
      const subItems = Array.from(contentCell.querySelectorAll('ul li')).map(li => {
        const link = li.querySelector('a');
        return {
          text: li.textContent.trim(),
          url: link ? link.getAttribute('href') : null
        };
      });
      
      // Create the publication object
      const publication = {
        id: `pub-${index + 1}`,
        attributes: {
          Title: title,
          Date: date,
          URL: mainLink.getAttribute('href'),
          Description: description || null,
        }
      };
      
      // Add image if present
      const image = contentCell.querySelector('img:not([src*="BD10263_"]):not([src*="lista"])');
      if (image) {
        let imgUrl = image.getAttribute('src');
        if (!imgUrl.startsWith('/')) {
          imgUrl = '/' + imgUrl;
        }
        
        publication.attributes.Image = {
          data: [{
            attributes: {
              url: imgUrl,
              alternativeText: title,
              width: image.getAttribute('width'),
              height: image.getAttribute('height')
            }
          }]
        };
      }
      
      // Add sub-items if present
      if (subItems.length > 0) {
        publication.attributes.SubItems = subItems;
      }
      
      // Add to publications array
      publications.push(publication);
    });
    
    return { data: publications };
  }
  
  // Example usage:
  /*
  const result = parsePublicationsHTML(htmlContent);
  console.log(JSON.stringify(result, null, 2));
  */