Page

"""
Tests for varbook.write.pdf module.
"""

import pytest
from pathlib import Path
import tempfile
import shutil
from varbook.write.pdf import (
    extract_links_from_markdown,
    collect_files_recursively,
    markdown_to_html,
    extract_headers,
    generate_toc_html,
    get_default_css
)


@pytest.fixture
def temp_dir():
    """Create a temporary directory for test files."""
    temp_path = Path(tempfile.mkdtemp())
    yield temp_path
    shutil.rmtree(temp_path)


def test_extract_links_from_markdown(temp_dir):
    """Test extracting links from markdown content."""
    md_content = """
    # Test Document

    This is a link to [another doc](./other.md).
    This is a link to [html doc](./page.html).
    This is an [external link](https://example.com) which should be ignored.
    This is an [anchor](#section) which should be ignored.
    """

    # Create referenced files
    (temp_dir / 'other.md').write_text('# Other Doc')
    (temp_dir / 'page.html').write_text('<h1>Page</h1>')

    links = extract_links_from_markdown(md_content, temp_dir)

    assert len(links) == 2
    assert any('other.md' in str(link) for link in links)
    assert any('page.html' in str(link) for link in links)


def test_extract_links_nonexistent_files(temp_dir):
    """Test that nonexistent files are not included in links."""
    md_content = """
    Link to [nonexistent](./missing.md).
    Link to [existing](./exists.md).
    """

    (temp_dir / 'exists.md').write_text('# Exists')

    links = extract_links_from_markdown(md_content, temp_dir)

    assert len(links) == 1
    assert 'exists.md' in str(links[0])


def test_collect_files_recursively(temp_dir):
    """Test recursive file collection with cycle detection."""
    # Create a chain of linked files
    (temp_dir / 'doc1.md').write_text('[Link to doc2](./doc2.md)')
    (temp_dir / 'doc2.md').write_text('[Link to doc3](./doc3.md)')
    (temp_dir / 'doc3.md').write_text('# Final Doc')

    files = collect_files_recursively([temp_dir / 'doc1.md'])

    assert len(files) == 3
    assert any('doc1.md' in str(f) for f in files)
    assert any('doc2.md' in str(f) for f in files)
    assert any('doc3.md' in str(f) for f in files)


def test_collect_files_with_cycle(temp_dir):
    """Test that cycles are detected and handled."""
    # Create a cycle: doc1 -> doc2 -> doc3 -> doc1
    (temp_dir / 'doc1.md').write_text('[Link to doc2](./doc2.md)')
    (temp_dir / 'doc2.md').write_text('[Link to doc3](./doc3.md)')
    (temp_dir / 'doc3.md').write_text('[Link back to doc1](./doc1.md)')

    files = collect_files_recursively([temp_dir / 'doc1.md'])

    # Should visit each file only once
    assert len(files) == 3
    assert len(set(files)) == 3  # All unique


def test_markdown_to_html():
    """Test markdown to HTML conversion."""
    md_content = """
    # Heading 1

    This is a paragraph with **bold** and *italic* text.

    ## Heading 2

    ```python
    def hello():
        print("world")
    ```

    | Column 1 | Column 2 |
    |----------|----------|
    | Data 1   | Data 2   |
    """

    html = markdown_to_html(md_content)

    assert '<h1>Heading 1</h1>' in html
    assert '<h2>Heading 2</h2>' in html
    assert '<strong>bold</strong>' in html
    assert '<em>italic</em>' in html
    assert '<code' in html
    assert '<table>' in html


def test_extract_headers():
    """Test header extraction from HTML."""
    html_content = """
    <h1 id="title">Main Title</h1>
    <h2 id="section1">Section 1</h2>
    <h3>Subsection 1.1</h3>
    <h2 id="section2">Section 2</h2>
    """

    headers = extract_headers(html_content)

    assert len(headers) == 4
    assert headers[0]['level'] == 1
    assert headers[0]['text'] == 'Main Title'
    assert headers[0]['id'] == 'title'

    assert headers[1]['level'] == 2
    assert headers[1]['text'] == 'Section 1'

    assert headers[2]['level'] == 3
    assert headers[2]['text'] == 'Subsection 1.1'


def test_generate_toc_html():
    """Test table of contents HTML generation."""
    headers = [
        {'level': 1, 'text': 'Title', 'id': 'title'},
        {'level': 2, 'text': 'Section 1', 'id': 'section1'},
        {'level': 3, 'text': 'Subsection 1.1', 'id': 'subsection11'},
        {'level': 2, 'text': 'Section 2', 'id': 'section2'}
    ]

    toc_html = generate_toc_html(headers)

    assert '<div class="toc">' in toc_html
    assert '<h1>Table of Contents</h1>' in toc_html
    assert '<a href="#title">Title</a>' in toc_html
    assert '<a href="#section1">Section 1</a>' in toc_html
    assert '<a href="#subsection11">Subsection 1.1</a>' in toc_html
    assert '<a href="#section2">Section 2</a>' in toc_html


def test_generate_toc_html_empty():
    """Test ToC generation with empty headers."""
    toc_html = generate_toc_html([])
    assert toc_html == ""


def test_get_default_css():
    """Test that default CSS is returned."""
    css = get_default_css()

    assert '@page' in css
    assert 'font-family' in css
    assert 'margin' in css
    assert '.toc' in css
    assert 'table' in css


def test_generate_pdf_basic(temp_dir):
    """Test basic PDF generation."""
    from varbook.write.pdf import generate_pdf

    # Create test markdown files
    (temp_dir / 'intro.md').write_text('# Introduction\n\nThis is the intro.')
    (temp_dir / 'variant.md').write_text('# Variant 1\n\nThis is a variant.')
    (temp_dir / 'conclusion.md').write_text('# Conclusion\n\nThe end.')

    output_pdf = temp_dir / 'output.pdf'

    generate_pdf(
        output_file=output_pdf,
        beginning_files=[temp_dir / 'intro.md'],
        variant_files=[temp_dir / 'variant.md'],
        ending_files=[temp_dir / 'conclusion.md'],
        include_toc=True
    )

    assert output_pdf.exists()
    assert output_pdf.stat().st_size > 0


def test_generate_pdf_with_links(temp_dir):
    """Test PDF generation with linked files."""
    from varbook.write.pdf import generate_pdf

    # Create linked markdown files
    (temp_dir / 'main.md').write_text('# Main\n\nSee [details](./details.md).')
    (temp_dir / 'details.md').write_text('# Details\n\nMore information here.')

    output_pdf = temp_dir / 'output.pdf'

    generate_pdf(
        output_file=output_pdf,
        beginning_files=[temp_dir / 'main.md'],
        include_toc=False
    )

    assert output_pdf.exists()
    assert output_pdf.stat().st_size > 0