mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
14 lines
479 B
Python
14 lines
479 B
Python
from bs4 import BeautifulSoup
|
|
|
|
|
|
def extract_content(html: str) -> str:
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
non_visible_tags = ["style", "script", "head", "meta", "[document]"]
|
|
for script_or_style in soup(non_visible_tags):
|
|
script_or_style.decompose()
|
|
text = soup.get_text(separator="\n")
|
|
lines = (line.strip() for line in text.splitlines())
|
|
# drop blank lines
|
|
minimized = [line for line in lines if line]
|
|
return "\n".join(minimized)
|