Detect HTML Attributes

  • + 0 comments

    Python 3

    import re
    import sys
    n = int(input())
    html = sys.stdin.read()
    pattern_tag = r'<(\w+)(\s\w+=["\'].*?["\'])?\s?/?>'
    matches = re.findall(pattern_tag, html)
    dict_tag = {}
    for match in matches:
        pattern_attribute = r'\s(\w+)='
        atts = re.findall(pattern_attribute, match[1])
        if match[0] in dict_tag:
            dict_tag[match[0]].update(atts)
        else:
            dict_tag[match[0]] = set(atts)
    dict_tag = dict(sorted(dict_tag.items()))
    for k, v in dict_tag.items():
        print(k,":", sep="", end="")
        atts = sorted(list(v))
        print(",".join(atts))