Source code for pysentimentanalyzer.sentiment_score_plot

# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pysentimentanalyzer.helper import *


[docs]def sentiment_score_plot(df, col): """ Creates a binned histogram showing count of reviews ranging from negative to positive sentiment score. Provides the most frequent sentiment score and the corresponding percentage Parameters ---------- df : Pandas DataFrame DataFrame containing the reviews col: String Name of the column that contains the reviews Returns ------- Plot: A plot object Examples -------- >>> plot = sentiment_score_plot(df, "column_name") >>> plt.show() """ # Check that the first input is dataframe if not isinstance(df, pd.DataFrame): raise TypeError("The first parameter should be Pandas Dataframe.") # Check that the second input is a string if not isinstance(col, str): raise TypeError("The second parameter should be string dtype") # Check that the column exists in dataframe if not (col in list(df.columns)): raise Exception("The second parameter is not a column in the dataframe") # Call the helper function to compute the sentiment and score data = get_sentiment_and_score(df, col) # Plot the figure fig_size = (7, 5) f = plt.figure(figsize=fig_size) ax = f.add_subplot(1, 1, 1) y, x, _ = plt.hist( data["compound_score"], density=False, bins=10, label="Sentiment Counts" ) plt.legend(loc="upper right") plt.ylabel("Count") plt.xlabel("Sentiment Score range") plt.title("Distibution of Sentiments") # Histogram metrics max_counts = y.max() min_counts = y.min() max_percentage = max_counts / len(data["compound_score"]) print( f"The highest frequency ({max_counts}) of sentiment scores are in range {x[y.argmax()]:.1f} to {x[y.argmax()+1]:.1f} which forms {max_percentage*100:.02f}% of samples in the dataset" ) return f