#!/usr/bin/env python
import re
if __name__ == '__main__':
lines = [line.rstrip('\n') for line in open('file.json')]
ids = set()
for line in lines:
match_line = re.search('^.+\"attribute\":\"([a-z|A-Z|0-9|-]+)\".+$', line, re.IGNORECASE)
if match_line is not None:
ids.add(match_line.group(1))
for id in ids:
print(id)
The code will group by and print the unique string values of some attribute.