#!/usr/bin/env python import re if __name__ == '__main__': lines = [line.rstrip('\n') for line in open('file.json')] ids = set() for line in lines: match_line = re.search('^.+\"attribute\":\"([a-z|A-Z|0-9|-]+)\".+$', line, re.IGNORECASE) if match_line is not None: ids.add(match_line.group(1)) for id in ids: print(id)
The code will group by and print the unique string values of some attribute.