#!/usr/bin/env python3

# Libervia: an XMPP client
# Copyright (C) 2009-2025 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from lxml import etree

from libervia.backend.plugins.plugin_comp_email_gateway.cleaning import (
    TextToHtmlConverter,
    MailingListHtmlCleaner,
    convert_to_html_and_detect_noise,
)


class TestTextToHtmlConverter:
    """Test the TextToHtmlConverter class."""

    def test_simple_text_conversion(self):
        """Simple text to HTML conversion."""
        converter = TextToHtmlConverter()
        text = "Hello world\nThis is a test"
        result = converter.text_to_html(text)

        root = etree.fromstring(f"<div>{result}</div>")

        # Should create a single paragraph with line break
        paragraphs = root.xpath("//p")
        assert len(paragraphs) == 1

        # Check that both parts of the text are in the paragraph
        paragraph_html = etree.tostring(paragraphs[0], encoding="unicode")
        assert "Hello world" in paragraph_html
        assert "This is a test" in paragraph_html
        assert "<br" in paragraph_html

    def test_paragraphs_with_empty_lines(self):
        """Several paragraphs are converted to several <p> elements."""
        converter = TextToHtmlConverter()
        text = "First paragraph\n\nSecond paragraph"
        result = converter.text_to_html(text)

        root = etree.fromstring(f"<div>{result}</div>")

        paragraphs = root.xpath("//p")
        assert len(paragraphs) == 2
        first_p_html = etree.tostring(paragraphs[0], encoding="unicode")
        second_p_html = etree.tostring(paragraphs[1], encoding="unicode")
        assert "First paragraph" in first_p_html
        assert "Second paragraph" in second_p_html

    def test_simple_blockquote(self):
        """Simple blockquote is converted."""
        converter = TextToHtmlConverter()
        text = "> This is a quote\n> Another quote line"
        result = converter.text_to_html(text)

        root = etree.fromstring(f"<div>{result}</div>")

        # Should contain blockquote
        blockquotes = root.xpath("//blockquote")
        assert len(blockquotes) >= 1

        # Should contain the quoted content
        assert len(blockquotes[0].xpath(".//p")) >= 1

    def test_nested_blockquotes(self):
        """ "Nested blockquotes are converted."""
        converter = TextToHtmlConverter()
        text = "> First level\n>> Second level\n> Back to first level"
        result = converter.text_to_html(text)

        root = etree.fromstring(f"<div>{result}</div>")

        # Check that we have blockquotes
        blockquotes = root.xpath("//blockquote")
        assert len(blockquotes) > 0

    def test_ordered_list_conversion(self):
        """Ordered lists a converted."""
        converter = TextToHtmlConverter()
        text = "Here are steps:\n1. First step\n2. Second step\n3. Third step"
        result = converter.text_to_html(text)

        root = etree.fromstring(f"<div>{result}</div>")

        lists = root.xpath("//ol")
        assert len(lists) == 1

        items = root.xpath("//li")
        assert len(items) == 3
        assert "First step" in items[0].text
        assert "Second step" in items[1].text
        assert "Third step" in items[2].text

    def test_unordered_list_conversion(self):
        """Unordered lists are converted."""
        converter = TextToHtmlConverter()
        text = "Here are points:\n- First point\n- Second point\n- Third point"
        result = converter.text_to_html(text)

        root = etree.fromstring(f"<div>{result}</div>")

        lists = root.xpath("//ul")
        assert len(lists) == 1

        items = root.xpath("//li")
        assert len(items) == 3
        assert "First point" in items[0].text
        assert "Second point" in items[1].text
        assert "Third point" in items[2].text


class TestMailingListHtmlCleaner:
    """Test the MailingListHtmlCleaner class."""

    def test_clean_simple_text(self):
        """Cleaning of simple text."""
        cleaner = MailingListHtmlCleaner()
        text = "Hello world\nThis is a test"
        result = cleaner.clean_message(text)

        root = etree.fromstring(f"<div>{result}</div>")

        # Should contain a paragraph
        paragraphs = root.xpath("//p")
        assert len(paragraphs) > 0

    def test_clean_with_quotes(self):
        """Cleaning of text with quotes."""
        cleaner = MailingListHtmlCleaner()
        text = "Hello\n> This is quoted\ntext"
        result = cleaner.clean_message(text)

        root = etree.fromstring(f"<div>{result}</div>")

        # Should contain blockquote
        blockquotes = root.xpath("//blockquote")
        assert len(blockquotes) > 0

    def test_clean_mailing_list_message(self):
        """Cleaning of a realistic XMPP mailing list message."""
        cleaner = MailingListHtmlCleaner()
        text = """On Wed, Mar 15, 2024 at 2:34 PM Louise <louise@example.org> wrote:

> Hi everyone,
>
> I wanted to share some thoughts on XMPP server optimization we've been discussing internally.
>
> Key points:
> - Use message carbons for better message synchronization
> - Implement proper roster versioning to reduce bandwidth
> - Consider using XEP-0313 (Message Archive Management) for history
> - Don't forget to enable compression on S2S connections
>
> What are your experiences with these approaches? I'd love to hear what strategies have worked well for your deployments.
>
> Thanks,
> Louise

---

Best practices for XMPP server optimization

I've been working on some performance improvements lately and wanted to share a few thoughts on XMPP server optimization.

I think we should be focusing on:
1. Proper XEP implementation based on use cases
2. Monitoring connection statistics
3. Regular maintenance of database indexes

Let me know if anyone has faced similar challenges or have different approaches they'd like to discuss.

Thanks,
Louise

--
Louise
Senior XMPP Engineer
example.org
Email: louise@example.org
Phone: (555) 123-4567

This message was sent to dev@example.org
To unsubscribe, visit: https://example.org/unsubscribe/dev
For archives and more information: https://example.org/archives/dev"""
        result = cleaner.clean_message(text)

        root = etree.fromstring(f"<div>{result}</div>")

        # Should contain proper HTML structure
        paragraphs = root.xpath("//p")
        assert len(paragraphs) > 0

        # Should contain blockquotes for the quoted message
        blockquotes = root.xpath("//blockquote")
        assert len(blockquotes) > 0

        # Should contain noise elements (reply context and mailing list signature)
        noise_elements = root.xpath("//*[@class]")
        noise_classes = [
            elem.get("class")
            for elem in noise_elements
            if "noise" in elem.get("class", "")
        ]
        assert len(noise_classes) > 0

        # Check that common sign-offs like "Thanks," are not marked as noise
        # They should not have noise class.
        thanks_elements = [
            elem for elem in noise_elements if "Thanks" in "".join(elem.itertext())
        ]
        signature_noise_elements = [
            elem for elem in noise_elements if "noise-signature" in elem.get("class", "")
        ]
        assert (
            len(thanks_elements) == 0
        ), 'Common sign-offs like "Thanks,"" should not be marked as noise'
        assert (
            len(signature_noise_elements) == 1
        ), "Mailing-list signature should be marked as noise."


def test_clean_mailing_list_content_function():
    """Test the main clean_mailing_list_content function."""
    text = """Hello
> Quoted text

--
This message was sent to dev@example.org
To unsubscribe, visit: https://example.org/unsubscribe/dev
For archives and more information: https://example.org/archives/dev"""
    result = convert_to_html_and_detect_noise(text)

    root = etree.fromstring(f"<div>{result}</div>")

    # Should contain proper HTML structure
    paragraphs = root.xpath("//p")
    assert len(paragraphs) > 0

    blockquotes = root.xpath("//blockquote")
    assert len(blockquotes) > 0

    # Should contain noise elements
    noise_elements = root.xpath("//*[@class]")
    noise_classes = [
        elem.get("class") for elem in noise_elements if "noise" in elem.get("class", "")
    ]
    # The mailing list signature should be detected as noise
    assert any("noise-signature" in cls for cls in noise_classes)


def test_common_signoffs_not_marked_as_noise():
    """Test that common sign-offs like 'regards' are not marked as noise."""
    text = """Hello everyone,

I wanted to share some thoughts with you.

Best regards,
John Doe"""
    result = convert_to_html_and_detect_noise(text)

    root = etree.fromstring(f"<div>{result}</div>")

    # Should contain proper HTML structure
    paragraphs = root.xpath("//p")
    assert len(paragraphs) > 0

    # Should NOT contain noise elements for common sign-offs
    noise_elements = root.xpath("//*[@class]")
    noise_signature_elements = [
        elem for elem in noise_elements if "noise-signature" in elem.get("class", "")
    ]
    # Common sign-offs like "Best regards," should not be marked as noise
    assert len(noise_signature_elements) == 0
