mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 16:52:16 +00:00
796 lines
21 KiB
Python
796 lines
21 KiB
Python
from bs4 import BeautifulSoup
|
|
import pytest
|
|
from pytest_django.asserts import assertHTMLEqual
|
|
|
|
from core.htmlhelper import (
|
|
REMOVE_ALL,
|
|
REMOVE_CSS_CLASSES,
|
|
REMOVE_TAGS,
|
|
convert_h1_to_h2,
|
|
get_library_documentation_urls,
|
|
modernize_legacy_page,
|
|
remove_css,
|
|
remove_duplicate_tag,
|
|
remove_first_tag,
|
|
remove_ids,
|
|
remove_release_classes,
|
|
remove_tables,
|
|
remove_tags,
|
|
remove_unwanted,
|
|
style_links,
|
|
modernize_release_notes,
|
|
)
|
|
|
|
BASE_HEAD = """
|
|
<link rel="stylesheet" href="mystyle.css" />
|
|
<meta charset="utf-8" />
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
|
<style>
|
|
body {background-color: powderblue;}
|
|
h1 {color: red;}
|
|
p {color: blue;}
|
|
</style>
|
|
<meta name="description" content="Unit Test" />
|
|
<meta name="keywords" content="HTML, CSS, JavaScript" />
|
|
"""
|
|
BASE_TOKEN = "<!-- Add your content here -->"
|
|
BASE_BODY = f"""
|
|
<div id="boost-legacy-docs-header">The Header, Log In/Log Out, Menu</div>
|
|
<h1>A very important heading</h1>
|
|
<p>My first paragraph.</p>
|
|
<div id="other-block-content-id">
|
|
<p>My second paragraph.</p>
|
|
</div>
|
|
<div id="boost-legacy-docs-body">
|
|
{BASE_TOKEN}
|
|
</div>
|
|
"""
|
|
BASE_HTML = f"""<!DOCTYPE html>
|
|
<head>{BASE_HEAD}</head>
|
|
<html>
|
|
<body>{BASE_BODY}</body>
|
|
</html>
|
|
"""
|
|
LEGACY_HEAD = """
|
|
<title>A Meaningful Page Title</title>
|
|
<link rel="stylesheet" href="old.css" />
|
|
"""
|
|
LEGACY_BODY = """
|
|
<h1>My Legacy Heading</h1>
|
|
<p>My legacy paragraph.</p>
|
|
"""
|
|
|
|
|
|
def _build_tag(tag_name, tag_attrs, inner="Something"):
|
|
tag = f"<{tag_name}"
|
|
if tag_attrs:
|
|
tag += " " + " ".join(f'{k}="{v}"' for k, v in tag_attrs.items())
|
|
if tag_name in ["img", "link"]:
|
|
tag += "/>"
|
|
elif tag_name == "hr":
|
|
tag += ">"
|
|
else:
|
|
tag += f">{inner}</{tag_name}>"
|
|
|
|
return tag
|
|
|
|
|
|
def _build_expected_body(expected_body):
|
|
return BASE_BODY.replace(
|
|
BASE_TOKEN,
|
|
f"""
|
|
{BASE_TOKEN}
|
|
{expected_body}
|
|
""",
|
|
)
|
|
|
|
|
|
def test_modernize_legacy_page_unchanged_empty():
|
|
original = """Something else"""
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html=BASE_HTML)
|
|
assertHTMLEqual(result, original)
|
|
|
|
|
|
def test_modernize_legacy_page_unchanged_simple():
|
|
original = """<!DOCTYPE html>
|
|
<html>
|
|
</html>
|
|
"""
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html="")
|
|
|
|
assertHTMLEqual(result, original)
|
|
|
|
|
|
def test_modernize_legacy_page_unchanged_no_head():
|
|
original = f"""<!DOCTYPE html>
|
|
<html>
|
|
<body>
|
|
{LEGACY_BODY}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html="")
|
|
|
|
assertHTMLEqual(result, original)
|
|
|
|
|
|
def test_modernize_legacy_page_unchanged_no_body():
|
|
original = f"""<!DOCTYPE html>
|
|
<head>
|
|
{LEGACY_HEAD}
|
|
</head>
|
|
<html>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html="")
|
|
|
|
assertHTMLEqual(result, original)
|
|
|
|
|
|
def test_modernize_legacy_page_adds_head_if_missing():
|
|
original = """<!DOCTYPE html>
|
|
<html>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html=BASE_HTML)
|
|
|
|
expected = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<!-- BEGIN Manually appending items -->
|
|
{BASE_HEAD}
|
|
<!-- END Manually appending items -->
|
|
</head>
|
|
</html>
|
|
"""
|
|
assertHTMLEqual(result, expected)
|
|
|
|
|
|
def test_modernize_legacy_page_appends_head_if_existing():
|
|
original = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
{LEGACY_HEAD}
|
|
</head>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html=BASE_HTML)
|
|
|
|
expected = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
{LEGACY_HEAD}
|
|
<!-- BEGIN Manually appending items -->
|
|
{BASE_HEAD}
|
|
<!-- END Manually appending items -->
|
|
</head>
|
|
</html>
|
|
"""
|
|
assertHTMLEqual(result, expected)
|
|
|
|
|
|
def test_modernize_legacy_page_mangles_body():
|
|
original = f"""<!DOCTYPE html>
|
|
<html>
|
|
<body>
|
|
{LEGACY_BODY}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
result = modernize_legacy_page(soup, base_html=BASE_HTML)
|
|
|
|
expected = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<!-- BEGIN Manually appending items -->
|
|
{BASE_HEAD}
|
|
<!-- END Manually appending items -->
|
|
</head>
|
|
<body>
|
|
{_build_expected_body(LEGACY_BODY)}
|
|
</body>
|
|
</html>
|
|
"""
|
|
assertHTMLEqual(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize("tag_name, tag_attrs", REMOVE_TAGS)
|
|
def test_modernize_legacy_page_remove_first_tag_found(tag_name, tag_attrs):
|
|
tag = _build_tag(tag_name, tag_attrs)
|
|
|
|
original = f"""<!DOCTYPE html>
|
|
<html>
|
|
<body>
|
|
<h1>My Legacy Heading</h1>
|
|
{tag}
|
|
<p>My legacy paragraph.</p>
|
|
{tag}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
soup = remove_unwanted(soup)
|
|
result = modernize_legacy_page(
|
|
soup, base_html=BASE_HTML, skip_replace_boostlook=True
|
|
)
|
|
|
|
body = _build_expected_body(LEGACY_BODY + tag)
|
|
expected = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<!-- BEGIN Manually appending items -->
|
|
{BASE_HEAD}
|
|
<!-- END Manually appending items -->
|
|
</head>
|
|
<body>
|
|
{body}
|
|
</body>
|
|
</html>
|
|
"""
|
|
assertHTMLEqual(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize("tag_name, tag_attrs", REMOVE_ALL)
|
|
def test_modernize_legacy_page_remove_all_tags_found(tag_name, tag_attrs):
|
|
tag = _build_tag(tag_name, tag_attrs)
|
|
|
|
original = f"""<!DOCTYPE html>
|
|
<html>
|
|
<body>
|
|
{tag}
|
|
<h1>My Legacy Heading</h1>
|
|
{tag}
|
|
<p>My legacy paragraph.</p>
|
|
{tag}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
soup = remove_unwanted(soup)
|
|
result = modernize_legacy_page(soup, base_html=BASE_HTML)
|
|
|
|
expected = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<!-- BEGIN Manually appending items -->
|
|
{BASE_HEAD}
|
|
<!-- END Manually appending items -->
|
|
</head>
|
|
<body>
|
|
{_build_expected_body(LEGACY_BODY)}
|
|
</body>
|
|
</html>
|
|
"""
|
|
assertHTMLEqual(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize("tag_name, tag_attrs", REMOVE_CSS_CLASSES)
|
|
def test_modernize_legacy_page_remove_only_css_class(tag_name, tag_attrs):
|
|
tag_attrs = tag_attrs.copy()
|
|
tag_attrs.setdefault("class", "something-to-remove")
|
|
tag = _build_tag(tag_name, tag_attrs)
|
|
|
|
original = f"""<!DOCTYPE html>
|
|
<html>
|
|
<body>
|
|
{tag}
|
|
<h1 class="some-class">My Legacy Heading</h1>
|
|
{tag}
|
|
<p>My legacy paragraph.</p>
|
|
{tag}
|
|
<div class="not-a-match"><p>Hello World</p></div>
|
|
{tag}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
soup = BeautifulSoup(original, "html.parser")
|
|
soup = remove_unwanted(soup)
|
|
result = modernize_legacy_page(soup, base_html=BASE_HTML)
|
|
|
|
# Build expected result
|
|
tag_attrs.pop("class")
|
|
tag_without_class = _build_tag(tag_name, tag_attrs)
|
|
expected_body = f"""
|
|
{tag_without_class}
|
|
<h1 class="some-class">My Legacy Heading</h1>
|
|
{tag_without_class}
|
|
<p>My legacy paragraph.</p>
|
|
{tag_without_class}
|
|
<div class="not-a-match"><p>Hello World</p></div>
|
|
{tag_without_class}
|
|
"""
|
|
body = _build_expected_body(expected_body)
|
|
expected = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<!-- BEGIN Manually appending items -->
|
|
{BASE_HEAD}
|
|
<!-- END Manually appending items -->
|
|
</head>
|
|
<body>
|
|
{body}
|
|
</body>
|
|
</html>
|
|
"""
|
|
assertHTMLEqual(result, expected)
|
|
|
|
|
|
def test_get_library_documentation_urls():
|
|
# HTML string for testing
|
|
test_content = """
|
|
<h2><a name="Alphabetically"></a></h2>
|
|
<ul>
|
|
<li><a href="/docs/path1">Library1</a></li>
|
|
<li><a href="/docs/path2">Library2</a></li>
|
|
</ul>
|
|
"""
|
|
expected_output = [("Library1", "/docs/path1"), ("Library2", "/docs/path2")]
|
|
result = get_library_documentation_urls(test_content)
|
|
assert result == expected_output
|
|
|
|
|
|
def test_get_library_documentation_urls_no_library_section():
|
|
# HTML string with no library section
|
|
test_content = """
|
|
<h2><a name="NotTheRightSection"></a></h2>
|
|
<ul>
|
|
<li><a href="/docs/path1">Library1</a></li>
|
|
<li><a href="/docs/path2">Library2</a></li>
|
|
</ul>
|
|
"""
|
|
result = get_library_documentation_urls(test_content)
|
|
assert result == []
|
|
|
|
|
|
def test_get_library_documentation_urls_no_libraries():
|
|
# HTML string with a library section but no libraries
|
|
test_content = """
|
|
<h2><a name="Alphabetically"></a></h2>
|
|
<ul>
|
|
<!-- No libraries -->
|
|
</ul>
|
|
"""
|
|
result = get_library_documentation_urls(test_content)
|
|
assert result == []
|
|
|
|
|
|
def test_get_library_documentation_urls_with_name_and_parent():
|
|
# HTML string for testing
|
|
test_content = """
|
|
<div><a name="CustomSection"></a></div>
|
|
<ul>
|
|
<li><a href="/docs/path1">CustomLibrary1</a></li>
|
|
<li><a href="/docs/path2">CustomLibrary2</a></li>
|
|
</ul>
|
|
"""
|
|
expected_output = [
|
|
("CustomLibrary1", "/docs/path1"),
|
|
("CustomLibrary2", "/docs/path2"),
|
|
]
|
|
result = get_library_documentation_urls(
|
|
test_content, name="CustomSection", parent="div"
|
|
)
|
|
assert result == expected_output
|
|
|
|
|
|
def test_convert_h1_to_h2():
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<h1>Title 1</h1>
|
|
<h1>Title 2</h1>
|
|
<p>Some text here</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
new_soup = convert_h1_to_h2(soup)
|
|
|
|
# Check if there are no h1 tags left
|
|
assert not new_soup.find_all("h1")
|
|
|
|
# Check if the h2 tags have the correct content
|
|
h2_tags = new_soup.find_all("h2")
|
|
assert len(h2_tags) == 2
|
|
assert h2_tags[0].get_text() == "Title 1"
|
|
assert h2_tags[1].get_text() == "Title 2"
|
|
|
|
|
|
def test_convert_h1_to_h2_none():
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<p>Some text here</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
new_soup = convert_h1_to_h2(soup)
|
|
h2_tags = new_soup.find_all("h2")
|
|
assert len(h2_tags) == 0
|
|
|
|
|
|
def test_remove_css():
|
|
# Sample HTML content with specific classes for testing
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div class="body-0">Content 0</div>
|
|
<div class="body-1">Content 1</div>
|
|
<div class="body-2">Content 2</div>
|
|
<p class="class3">Content 3</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Call the function with the REMOVE_CSS_CLASSES constant
|
|
soup = remove_css(soup, REMOVE_CSS_CLASSES)
|
|
|
|
# Assertions
|
|
# Check each tag in REMOVE_CSS_CLASSES to ensure its class has been removed
|
|
for tag_name, tag_attrs in REMOVE_CSS_CLASSES:
|
|
found_tag = soup.find(tag_name, tag_attrs)
|
|
if found_tag:
|
|
assert "class" not in found_tag.attrs
|
|
else:
|
|
# Handle the case where the tag wasn't found
|
|
print(f"Tag not found: {tag_name} with attributes {tag_attrs}")
|
|
|
|
# Check that other tags not in REMOVE_CSS_CLASSES are unaffected
|
|
assert "class" in soup.find("p", {"class": "class3"}).attrs
|
|
|
|
|
|
def test_remove_css_none():
|
|
"""Test that remove_css still works if none of the CSS classes are present"""
|
|
# Sample HTML content with specific classes for testing
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div>Content 0</div>
|
|
<div>Content 1</div>
|
|
<div>Content 2</div>
|
|
<p>Content 3</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Call the function with the REMOVE_CSS_CLASSES constant
|
|
soup = remove_css(soup, REMOVE_CSS_CLASSES)
|
|
|
|
# Assertions
|
|
# Check each tag in REMOVE_CSS_CLASSES to ensure class has been removed
|
|
for tag_name, tag_attrs in REMOVE_CSS_CLASSES:
|
|
found_tags = soup.find_all(tag_name, **tag_attrs)
|
|
for found_tag in found_tags:
|
|
assert found_tag is None or "class" not in found_tag.attrs
|
|
|
|
# Check that other tags not in REMOVE_CSS_CLASSES are unaffected
|
|
other_tag = soup.find("p") # Find the first p tag regardless of class
|
|
if other_tag and "class" in other_tag.attrs:
|
|
assert "class" in other_tag.attrs # Assert that p tag still has its class
|
|
else:
|
|
assert other_tag is not None # Assert that the p tag was found
|
|
|
|
|
|
def test_remove_duplicate_tag():
|
|
# Sample HTML content with duplicate <h2> tags
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<h2>Header 1</h2>
|
|
<h2>Header 1</h2> <!-- Duplicate -->
|
|
<h2>Header 2</h2>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Call the function
|
|
soup = remove_duplicate_tag(soup, "h2")
|
|
|
|
# Assertions
|
|
h2_tags = soup.find_all("h2")
|
|
assert len(h2_tags) == 2
|
|
assert h2_tags[0].get_text(strip=True) == "Header 1"
|
|
assert h2_tags[1].get_text(strip=True) == "Header 2"
|
|
|
|
|
|
def test_remove_duplicate_tag_none():
|
|
# Sample HTML content with duplicate <h2> tags
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<h2>Header 1</h2>
|
|
<h2>Header 2</h2>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Call the function
|
|
soup = remove_duplicate_tag(soup, "h2")
|
|
|
|
# Assertions
|
|
h2_tags = soup.find_all("h2")
|
|
assert len(h2_tags) == 2
|
|
assert h2_tags[0].get_text(strip=True) == "Header 1"
|
|
assert h2_tags[1].get_text(strip=True) == "Header 2"
|
|
|
|
|
|
def test_remove_first_tag():
|
|
# Sample HTML content with multiple occurrences of certain tags
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div id="header1">Header 1</div>
|
|
<div id="header2">Header 2</div>
|
|
<div id="header1">Another Header 1</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Tags to be removed
|
|
tags_to_remove = [("div", {"id": "header1"}), ("div", {"id": "header2"})]
|
|
|
|
# Call the function
|
|
soup = remove_first_tag(soup, tags_to_remove)
|
|
|
|
# Assertions
|
|
# The first occurrence of each tag should be removed
|
|
assert (
|
|
soup.find("div", {"id": "header1"}).get_text(strip=True) == "Another Header 1"
|
|
)
|
|
assert (
|
|
soup.find("div", {"id": "header2"}) is None
|
|
) # Second div with 'header2' should be removed
|
|
|
|
|
|
def test_remove_first_tag_none():
|
|
# Sample HTML content with multiple occurrences of certain tags
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div id="header1">Header 1</div>
|
|
<div id="header2">Header 2</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Tags to be removed
|
|
tags_to_remove = [("div", {"id": "header1"}), ("div", {"id": "header2"})]
|
|
|
|
# Call the function
|
|
soup = remove_first_tag(soup, tags_to_remove)
|
|
# The first occurrence of each tag should be removed
|
|
assert soup.find("div", {"id": "header1"}) is None
|
|
assert soup.find("div", {"id": "header2"}) is None
|
|
|
|
|
|
def test_remove_ids():
|
|
# Sample HTML content with multiple tags having specific ids
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div id="remove1">Remove me</div>
|
|
<div id="unwrap1"><p>Unwrap me</p></div>
|
|
<div id="keep1">Keep me</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Ids to be processed
|
|
ids_to_process = ["remove1", "unwrap1"]
|
|
|
|
# Call the function
|
|
soup = remove_ids(soup, ids_to_process)
|
|
|
|
# Assertions
|
|
# The tag with id 'remove1' should be removed
|
|
assert soup.find(id="remove1") is None
|
|
|
|
# The tag with id 'unwrap1' should be unwrapped
|
|
assert soup.find(id="unwrap1") is None
|
|
assert soup.find("p").get_text(strip=True) == "Unwrap me"
|
|
|
|
# The tag with id 'keep1' should not be affected
|
|
assert soup.find(id="keep1") is not None
|
|
assert soup.find(id="keep1").get_text(strip=True) == "Keep me"
|
|
|
|
|
|
def test_remove_release_classes():
|
|
# Sample HTML content with multiple tags having specific classes
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div class="class-to-remove">Remove me</div>
|
|
<div class="class-to-unwrap"><p>Unwrap me</p></div>
|
|
<div class="class-to-keep">Keep me</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Classes to be processed
|
|
classes_to_process = ["class-to-remove", "class-to-unwrap"]
|
|
|
|
# Call the function
|
|
soup = remove_release_classes(soup, classes_to_process)
|
|
|
|
# Assertions
|
|
# The tag with class 'class-to-remove' should be removed
|
|
assert soup.find(class_="class-to-remove") is None
|
|
|
|
# The tag with class 'class-to-unwrap' should be unwrapped
|
|
assert soup.find(class_="class-to-unwrap") is None
|
|
assert soup.find("p").get_text(strip=True) == "Unwrap me"
|
|
|
|
# The tag with class 'class-to-keep' should not be affected
|
|
assert soup.find(class_="class-to-keep") is not None
|
|
assert soup.find(class_="class-to-keep").get_text(strip=True) == "Keep me"
|
|
|
|
|
|
def test_remove_tables():
|
|
# Sample HTML content with multiple tables
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<table class="table-to-remove">Content 1</table>
|
|
<table class="table-to-remove">Content 2</table>
|
|
<table class="other-class">Content 3</table>
|
|
<table>Content 4</table>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# The class name of the tables to be removed
|
|
class_name_to_remove = "table-to-remove"
|
|
|
|
# Call the function
|
|
soup = remove_tables(soup, class_name_to_remove)
|
|
|
|
# Assertions
|
|
assert (
|
|
soup.find_all("table", {"class": class_name_to_remove}) == []
|
|
) # All tables with class 'table-to-remove' should be removed
|
|
assert (
|
|
soup.find("table", {"class": "other-class"}).get_text(strip=True) == "Content 3"
|
|
) # This table should not be affected
|
|
assert (
|
|
soup.find("table", class_=False).get_text(strip=True) == "Content 4"
|
|
) # The table without a class should not be affected
|
|
|
|
|
|
def test_remove_tags():
|
|
# Sample HTML content with multiple tags and attributes
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<div class="class-to-remove">Content 1</div>
|
|
<p id="id-to-remove">Content 2</p>
|
|
<span custom-attr="value-to-remove">Content 3</span>
|
|
<div>Content 4</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Tags with attributes to be removed
|
|
tags_to_remove = [
|
|
("div", {"class": "class-to-remove"}),
|
|
("p", {"id": "id-to-remove"}),
|
|
("span", {"custom-attr": "value-to-remove"}),
|
|
]
|
|
|
|
# Call the function
|
|
soup = remove_tags(soup, tags_to_remove)
|
|
|
|
# Assertions
|
|
assert soup.find("div", {"class": "class-to-remove"}) is None
|
|
assert soup.find("p", {"id": "id-to-remove"}) is None
|
|
assert soup.find("span", {"custom-attr": "value-to-remove"}) is None
|
|
assert (
|
|
soup.find("div").get_text(strip=True) == "Content 4"
|
|
) # The last div should not be affected
|
|
|
|
|
|
def test_style_links():
|
|
# Sample HTML content with multiple links
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<a href="link1.html">Link 1</a>
|
|
<a href="link2.html">Link 2</a>
|
|
<a href="link3.html">Link 3</a>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Class name to be added to all links
|
|
new_class_name = "styled-link"
|
|
|
|
# Call the function
|
|
soup = style_links(soup, new_class_name)
|
|
|
|
# Assertions
|
|
for a_tag in soup.find_all("a"):
|
|
assert new_class_name in a_tag.get("class", []), "Class not added to link"
|
|
|
|
|
|
def test_style_links_no_links():
|
|
# Sample HTML content with multiple links
|
|
html_content = """
|
|
<html>
|
|
<body>
|
|
<p>Sample</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Class name to be added to all links
|
|
new_class_name = "styled-link"
|
|
|
|
# Call the function
|
|
soup = style_links(soup, new_class_name)
|
|
|
|
# Assertions
|
|
for a_tag in soup.find_all("a"):
|
|
assert new_class_name in a_tag.get("class", []), "Class not added to link"
|
|
|
|
|
|
def test_modernize_release_notes():
|
|
content = open("core/tests/content/boost_release_notes_sample.html", "rb").read()
|
|
output = modernize_release_notes(content)
|
|
expected_output = (
|
|
open("core/tests/content/boost_release_notes_filtered_output.html")
|
|
.read()
|
|
.strip()
|
|
)
|
|
assert output == expected_output
|