Skip to content

Commit cb89199

Browse files
authored
Merge pull request #50 from softwaresaved/vis-wordcloud
Produce word cloud from list of all definitions
2 parents fca2268 + 4cbb246 commit cb89199

5 files changed

Lines changed: 78 additions & 0 deletions

File tree

vis/definitions_wordcloud.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Code based on gallery example for wordcloud library found at:
2+
# https://amueller.github.io/word_cloud/auto_examples/
3+
# colored.html#sphx-glr-auto-examcples-colored-py
4+
#
5+
# Image from:
6+
# https://society-rse.org/trademark-and-logo-policy/
7+
8+
# Requires libraries for the imports below, notably PIL, yaml and wordcloud
9+
10+
from os import path
11+
from PIL import Image
12+
import numpy as np
13+
import matplotlib.pyplot as plt
14+
import os
15+
16+
import yaml
17+
18+
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
19+
20+
# Get data directory (using getcwd() is needed to support running example in generated IPython notebook)
21+
d = path.dirname(__file__) if "__file__" in locals() else os.getcwd()
22+
23+
# Path to the canonical YAML
24+
defs_yaml_path = "../docs/_data/terms.yml"
25+
26+
27+
def parse_all_definitions():
28+
"""Get all word definitions from the YAML defintions."""
29+
with open(defs_yaml_path, "r") as f:
30+
data = yaml.load(f, Loader=yaml.SafeLoader)
31+
32+
# Print the values as a dictionary
33+
print("All YAML data is:", data)
34+
data = data
35+
all_defs = []
36+
for item in data:
37+
if "definition" in item:
38+
all_defs.append(item["definition"])
39+
40+
print("All definitions pulled from the YAML are:", all_defs)
41+
42+
return all_defs
43+
44+
45+
def generate_wordcloud(text):
46+
"""Generates a word cloud of input text."""
47+
48+
# read the mask / color image
49+
alice_coloring = np.array(
50+
Image.open(path.join(d, "images/rse_logo_crop_white_background.png")))
51+
stopwords = set(STOPWORDS)
52+
stopwords.add("said")
53+
54+
wc = WordCloud(background_color="white", max_words=2000, mask=alice_coloring,
55+
stopwords=stopwords, max_font_size=40, random_state=42)
56+
# generate word cloud
57+
wc.generate(text)
58+
59+
# create coloring from image
60+
image_colors = ImageColorGenerator(alice_coloring)
61+
62+
# show
63+
fig, axes = plt.subplots(1, 1)
64+
# recolor wordcloud and show
65+
# we could also give color_func=image_colors directly in the constructor
66+
axes.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
67+
axes.set_axis_off()
68+
plt.show()
69+
fig.savefig("images/definitions_word_cloud.png")
70+
71+
72+
if __name__ == "__main__":
73+
definitions = parse_all_definitions()
74+
75+
# Convert list of definitions to a merged string
76+
defs_combined = " ".join(definitions)
77+
78+
generate_wordcloud(defs_combined)
60.7 KB
Loading

vis/images/rse_logo_colour.png

76.2 KB
Loading
63.8 KB
Loading
58 KB
Loading

0 commit comments

Comments
 (0)