Sort by

recency

|

106 Discussions

|

3 months ago+ 0 comments

Interesting approach! Just curious — how are you handling self-closing tags like
or and attributes that might appear in different orders? Are you using a parser like HTMLParser or regular expressions for extraction?

Regards, https://halfbirthdaycal.com/

10 months ago+ 0 comments

The exact code will be: import re

Read input

N = int(raw_input()) html = '' for _ in range(N): html += raw_input()

Extract tags and attributes using regex

tags = {} pattern = r'<(\w+)([^>]*)>' matches = re.findall(pattern, html)

for match in matches: tag, attrs = match if tag not in tags: tags[tag] = set()

attr_pattern = r'(\w+)='
attrs_match = re.findall(attr_pattern, attrs)
tags[tag].update(attrs_match)

Print output

for tag in sorted(tags): if tag == 'a': attrs = ','.join(sorted([attr for attr in tags[tag] if attr in ['accesskey', 'href', 'title']])) else: attrs = ','.join(sorted(tags[tag]))

if attrs:
    print("%s:%s" % (tag, attrs))
else:
    print("%s:" % tag)

1 year ago+ 8 comments

Java 15

TreeMap holds tag and corresponding attributes as list

Regex for tag :

Means "open_tag + tag_name + (anything before finding close_tag) +close_tag".
Here tag_name consists of word chars only.

Regex for attribute :

Means : (space)attribute_name="attr_value"
We search for the tag attributes within the full_tag only.
attr_values can be enclosed within single or double quotes.

import java.io.*;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class Solution {
    public static void main(String[] args) {
        Scanner scanner = new Scanner(System.in);
        int n = Integer.parseInt(scanner.nextLine());
        Pattern tag_pattern = Pattern.compile("<(\\w+).*?>");
        Pattern attr_pattern = Pattern.compile("\\s(.*?)=[\"'].*?[\"']");
        TreeMap<String,TreeSet> tag_treemap = new TreeMap<String,TreeSet>(); 
        for(int i=0;i<n;i++){   
            Matcher tag_matcher = tag_pattern.matcher(scanner.nextLine());
            while(tag_matcher.find()){ 
                String full_tag = tag_matcher.group(0);
                String tag_name = tag_matcher.group(1);
                boolean attr_found=false;
                Matcher attr_matcher = attr_pattern.matcher(full_tag); 
                TreeSet<String> attr_treeset;
                if (tag_treemap.containsKey(tag_name))  
                    attr_treeset=tag_treemap.get(tag_name);
                else
                    attr_treeset = new TreeSet<String>();
                while(attr_matcher.find()){
                    String attr_name = attr_matcher.group(1);
                    attr_treeset.add(attr_name);
                    attr_found=true;    
                }
                tag_treemap.put(tag_name,attr_treeset);
            }
        }
        for(String key: tag_treemap.keySet()){   
            System.out.print(key+":");
            System.out.println(String.join(",",tag_treemap.get(key)));
        }
    }
}

View Thread

1 year ago+ 0 comments

Python 3

import re
import sys
n = int(input())
html = sys.stdin.read()
pattern_tag = r'<(\w+)(\s\w+=["\'].*?["\'])?\s?/?>'
matches = re.findall(pattern_tag, html)
dict_tag = {}
for match in matches:
    pattern_attribute = r'\s(\w+)='
    atts = re.findall(pattern_attribute, match[1])
    if match[0] in dict_tag:
        dict_tag[match[0]].update(atts)
    else:
        dict_tag[match[0]] = set(atts)
dict_tag = dict(sorted(dict_tag.items()))
for k, v in dict_tag.items():
    print(k,":", sep="", end="")
    atts = sorted(list(v))
    print(",".join(atts))