Source code for hed.tools.visualization.tag_word_cloud

import numpy as np
from PIL import Image
from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg


[docs]def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=None, **kwargs): """Takes a word dict and returns a generated word cloud object Parameters: word_dict(dict): words and their frequencies mask_path(str or None): The path of the mask file background_color(str or None): If None, transparent background. width(int): width in pixels height(int): height in pixels kwargs(kwargs): Any other parameters WordCloud accepts, overrides default values where relevant. Returns: word_cloud(WordCloud): The generated cloud. Use .to_file to save it out as an image. :raises ValueError: An empty dictionary was passed """ mask_image = None if mask_path: mask_image = load_and_resize_mask(mask_path, width, height) width = mask_image.shape[1] height = mask_image.shape[0] if height is None: if width is None: width = 400 height = width // 2 if width is None: width = height * 2 kwargs.setdefault('contour_width', 3) kwargs.setdefault('contour_color', 'black') kwargs.setdefault('prefer_horizontal', 0.75) kwargs.setdefault('color_func', default_color_func) kwargs.setdefault('relative_scaling', 1) kwargs.setdefault('max_font_size', height / 15) kwargs.setdefault('min_font_size', 5) wc = WordCloud(background_color=background_color, mask=mask_image, width=width, height=height, mode="RGBA", **kwargs) wc.generate_from_frequencies(word_dict) return wc
[docs]def word_cloud_to_svg(wc): """Takes word cloud and returns it as an SVG string. Parameters: wc(WordCloud): the word cloud object Returns: svg_string(str): The svg for the word cloud """ svg_string = wc.to_svg() svg_string = svg_string.replace("fill:", "fill:rgb") svg_string = svg_string.replace("</svg>", generate_contour_svg(wc, wc.width, wc.height) + "</svg>") return svg_string
[docs]def summary_to_dict(summary, transform=np.log10, adjustment=5): """Converts a HedTagSummary json dict into the word cloud input format Parameters: summary(dict): The summary from a summarize hed tags op transform(func): The function to transform the number of found tags Default log10 adjustment(int): Value added after transform. Returns: word_dict(dict): a dict of the words and their occurrence count :raises KeyError: A malformed dictionary was passed """ if transform is None: transform = lambda x: x overall_summary = summary.get("Overall summary", {}) specifics = overall_summary.get("Specifics", {}) tag_dict = specifics.get("Main tags", {}) word_dict = {} for tag_sub_list in tag_dict.values(): for tag_sub_dict in tag_sub_list: word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment return word_dict
[docs]def load_and_resize_mask(mask_path, width=None, height=None): """ Load a mask image and resize it according to given dimensions. The image is resized maintaining aspect ratio if only width or height is provided. Returns None if no mask_path. Parameters: mask_path (str): The path to the mask image file. width (int, optional): The desired width of the resized image. If only width is provided, the image is scaled to maintain its original aspect ratio. Defaults to None. height (int, optional): The desired height of the resized image. If only height is provided, the image is scaled to maintain its original aspect ratio. Defaults to None. Returns: numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255). """ if mask_path: mask_image = Image.open(mask_path) if width or height: original_size = np.array((mask_image.width, mask_image.height)) output_size = np.array((width, height)) # Handle one missing param if not height: scale = original_size[0] / width output_size = original_size / scale elif not width: scale = original_size[1] / height output_size = original_size / scale mask_image = mask_image.resize(output_size.astype(int), Image.LANCZOS) # Convert to greyscale then to binary black and white (0 or 255) mask_image = mask_image.convert('L') mask_image_array = np.array(mask_image) mask_image_array = np.where(mask_image_array > 127, 255, 0) else: mask_image_array = np.array(mask_image) return mask_image_array.astype(np.uint8)