found = []
with open("romeo.txt", "r") as f:
import re
for line in f:
line = re.sub(r"[^\w ']", "", line)
found.extend(line.split(" "));
unique = set(found)
# And/and, But/but, Her/her as different words
print("{:13} {}".format("Words found:", len(found)))
print("{:13} {}".format("Unique words:", len(unique)))
print("\n{}\n{}".format("Sorted unique list:", sorted(unique, key=str.casefold)))
# key=str.casefold sorts case insentitive, like key=str.lower
#M# file open/read/parse, regexp, regular expression, extend list, unique items list, print format
But soft! What light through yonder window's breaks?
It is the east, and Juliet is the sun.
Arise, fair sun, and kill the envious moon,
Who is already sick and pale with grief,
That thou, her maid, art far more fair than she.
Be not her maid since she is envious.
Her vestal livery is but sick and green,
And none but fools do wear it. Cast it off!