THIS FAILS IN libxml2 2.6.29 AND 2.6.30 !! >>> from lxml.html import fromstring, tostring >>> from lxml.html.clean import clean, clean_html, Cleaner >>> from lxml.html import usedoctest >>> def tostring(el): # work-around for Py3 'bytes' type ... from lxml.html import tostring ... s = tostring(el) ... if not isinstance(s, str): ... s = s.decode('UTF-8') ... return s >>> doc_embed = '''
... ... ... ... ...
''' >>> print(tostring(fromstring(doc_embed)))
>>> print(Cleaner().clean_html(doc_embed))
>>> print(Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed))
>>> print(Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed))