(Chilkat2-Python) Download HTML from URL and Convert to XML
Downloads an HTML page from a URL and converts it to XML.
import sys
import chilkat2
# Note: This example requires the Chilkat Bundle license.
# Any string argument automatically begins the 30-day trial.
glob = chilkat2.Global()
success = glob.UnlockBundle("30-day trial")
if (success != True):
print(glob.LastErrorText)
sys.exit()
http = chilkat2.Http()
html = http.QuickGetStr("http://www.intel.com/")
if (http.LastMethodSuccess != True):
print(http.LastErrorText)
sys.exit()
htmlToXml = chilkat2.HtmlToXml()
# Indicate the charset of the output XML we'll want.
htmlToXml.XmlCharset = "utf-8"
# Set the HTML:
htmlToXml.Html = html
# Convert to XML:
xml = htmlToXml.ToXml()
# Save the XML to a file.
# Make sure your charset here matches the charset
# used for the XmlCharset property.
success = htmlToXml.WriteStringToFile(xml,"qa_output/out.xml","utf-8")
print("Finished.")
|