#!pip install seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
#shows graphic without 'plt.show()'
%matplotlib inline
# loading databases
tips = sns.load_dataset('tips')
tips.head()
flights = sns.load_dataset('flights')
flights.head()
iris = sns.load_dataset('iris')
iris.head()
sns.relplot(x=None, y=None, hue=None, size=None, style=None, data=None, row=None, col=None, col_wrap=None, row_order=None, col_order=None, palette=None, hue_order=None, hue_norm=None, sizes=None, size_order=None, size_norm=None, markers=None, dashes=None, style_order=None, legend='brief', kind='scatter', height=5, aspect=1, facet_kws=None, **kwargs)
This function provides access to several different axes-level functions that show the relationship between two variables with semantic mappings of subsets. The kind parameter selects the underlying axes-level function to use:
scatterplot() (with kind="scatter"; the default)
lineplot() (with kind="line")
sns.relplot(x="total_bill", y="tip", hue="smoker", data=tips);
sns.relplot(x="total_bill", y="tip", hue="smoker", style="time", data=tips);
sns.scatterplot(x=None, y=None, hue=None, style=None, size=None, data=None, palette=None, hue_order=None, hue_norm=None, sizes=None, size_order=None, size_norm=None, markers=True, style_order=None, x_bins=None, y_bins=None, units=None, estimator=None, ci=95, n_boot=1000, alpha='auto', x_jitter=None, y_jitter=None, legend='brief', ax=None, **kwargs)
The most familiar way to visualize a bivariate distribution is a scatterplot, where each observation is shown with point at the x and y values. This is analogous to a rug plot on two dimensions. You can draw a scatterplot with scatterplot(), and it is also the default kind of plot shown by the jointplot() function:
sns.scatterplot(x='total_bill',y='tip',data=tips)
sns.lineplot(x=None, y=None, hue=None, size=None, style=None, data=None, palette=None, hue_order=None, hue_norm=None, sizes=None, size_order=None, size_norm=None, dashes=True, markers=None, style_order=None, units=None, estimator='mean', ci=95, n_boot=1000, seed=None, sort=True, err_style='band', err_kws=None, legend='brief', ax=None, **kwargs)
The relationship between x and y can be shown for different subsets of the data using the hue, size, and style parameters. These parameters control what visual semantics are used to identify the different subsets. It is possible to show up to three dimensions independently by using all three semantic types, but this style of plot can be hard to interpret and is often ineffective. Using redundant semantics (i.e. both hue and style for the same variable) can be helpful for making graphics more accessible.
fmri = sns.load_dataset("fmri")
sns.lineplot(x="timepoint", y="signal", data=fmri)
sns.lineplot(x="timepoint", y="signal", hue="event", style="event", data=fmri)
sns.lineplot(x="timepoint", y="signal", hue="region", style="event", data=fmri)
dots = sns.load_dataset("dots").query("align == 'dots'")
sns.lineplot(x="time", y="firing_rate", size="coherence", hue="choice", sizes=(.25, 2.5), data=dots)
sns.relplot(x="timepoint", y="signal", hue="event", style="event",
col="subject", col_wrap=5,
height=3, aspect=.75, linewidth=2.5,
kind="line", data=fmri.query("region == 'frontal'"))
sns.regplot(x, y, data=None, x_estimator=None, x_bins=None, x_ci='ci', scatter=True, fit_reg=True, ci=95, n_boot=1000, units=None, seed=None, order=1, logistic=False, lowess=False, robust=False, logx=False, x_partial=None, y_partial=None, truncate=True, dropna=True, x_jitter=None, y_jitter=None, label=None, color=None, marker='o', scatter_kws=None, line_kws=None, ax=None)
Plot data and a linear regression model fit. There are a number of mutually exclusive options for estimating the regression model.
sns.regplot(tips['total_bill'], tips['tip'], color="g")
sns.lmplot(x, y, data, hue=None, col=None, row=None, palette=None, col_wrap=None, height=5, aspect=1, markers='o', sharex=True, sharey=True, hue_order=None, col_order=None, row_order=None, legend=True, legend_out=True, x_estimator=None, x_bins=None, x_ci='ci', scatter=True, fit_reg=True, ci=95, n_boot=1000, units=None, seed=None, order=1, logistic=False, lowess=False, robust=False, logx=False, x_partial=None, y_partial=None, truncate=True, x_jitter=None, y_jitter=None, scatter_kws=None, line_kws=None, size=None)
This function combines regplot() and FacetGrid. It is intended as a convenient interface to fit regression models across conditional subsets of a dataset.
When thinking about how to assign variables to different facets, a general rule is that it makes sense to use hue for the most important comparison, followed by col and row. However, always think about your particular dataset and the goals of the visualization you are creating.
sns.lmplot(x="total_bill", y="tip", data=tips)
sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips, markers=["o", "x"], palette=dict(Yes="g", No="m"))
g = sns.lmplot(x="total_bill", y="tip", col="day", hue="day", data=tips, col_wrap=2, height=3)
sns.displot(a, bins=None, hist=True, kde=True, rug=False, fit=None, hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None, color=None, vertical=False, norm_hist=False, axlabel=None, label=None, ax=None)
The most convenient way to take a quick look at a univariate distribution in seaborn is the distplot() function. By default, this will draw a histogram and fit a kernel density estimate (KDE).
sns.distplot(tips['total_bill'])
A histogram represents the distribution of data by forming bins along the range of the data and then drawing bars to show the number of observations that fall in each bin.
When drawing histograms, the main choice you have is the number of bins to use and where to place them. distplot() uses a simple rule to make a good guess for what the right number is by default, but trying more or fewer bins might reveal other features in the data:
#removing kde layer and defining customized bins
sns.distplot(tips['total_bill'],kde=False,bins=20)
sns.kdeplot(data, data2=None, shade=False, vertical=False, kernel='gau', bw='scott', gridsize=100, cut=3, clip=None, legend=True, cumulative=False, shade_lowest=True, cbar=False, cbar_ax=None, cbar_kws=None, ax=None, **kwargs)
sns.rugplot(a, height=0.05, axis='x', ax=None, **kwargs)
Like the histogram, the KDE plots encode the density of observations on one axis with height along the other axis. kdeplots are Kernel Density Estimation plots. These KDE plots replace every single observation with a Gaussian (Normal) distribution centered around that value.
#Create dataset
x = np.random.randn(50)
sns.kdeplot(x, shade = True)
sns.rugplot(x)
Drawing a KDE is more computationally involved than drawing a histogram. What happens is that each observation is first replaced with a normal (Gaussian) curve centered at that value:
# Create another rugplot
sns.rugplot(x);
# Set up the x-axis for the plot
x_min = x.min() - 2
x_max = x.max() + 2
# 100 equally spaced points from x_min to x_max
x_axis = np.linspace(x_min,x_max,100)
bandwidth = ((4*x.std()**5)/(3*len(x)))**.2 #http://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
# Create an empty kernel list
kernel_list = []
# Plot each basis function
for data_point in x:
# Create a kernel for each point and append to list
kernel = stats.norm(data_point,bandwidth).pdf(x_axis)
kernel_list.append(kernel)
#Scale for plotting
kernel = kernel / kernel.max()
kernel = kernel * .4
plt.plot(x_axis,kernel,color = 'r',alpha=0.5)
plt.ylim(0,1)
Next, these curves are summed to compute the value of the density at each point in the support grid. The resulting curve is then normalized so that the area under it is equal to 1:
# Plot the sum of the basis function
sum_of_kde = np.sum(kernel_list,axis=0)
# Plot figure
fig = plt.plot(x_axis,sum_of_kde,color='indianred')
# Add the initial rugplot
sns.rugplot(x,c = 'indianred')
# Get rid of y-tick marks
plt.yticks([])
# Set title
plt.suptitle("Sum of the Basis Functions")
We can see that if we use the kdeplot() function in seaborn, we get the same curve. This function is used by distplot(), but it provides a more direct interface with easier access to other options when you just want the density estimate:
sns.jointplot(x, y, data=None, kind='scatter', stat_func=None, color=None, height=6, ratio=5, space=0.2, dropna=True, xlim=None, ylim=None, joint_kws=None, marginal_kws=None, annot_kws=None, **kwargs)
sns.JointGrid(x, y, data=None, height=6, ratio=5, space=0.2, dropna=True, xlim=None, ylim=None, size=None)
It can also be useful to visualize a bivariate distribution of two variables. The easiest way to do this in seaborn is to just use the jointplot() function, which creates a multi-panel figure that shows both the bivariate (or joint) relationship between two variables along with the univariate (or marginal) distribution of each on separate axes.
sns.jointplot(x='total_bill',y='tip',data=tips,kind='scatter')
#regression + kde
sns.jointplot(x='total_bill',y='tip',data=tips,kind='reg')
#normalized using regression
sns.jointplot(x='total_bill',y='tip',data=tips,kind='resid')
A bivariate analogue of a histogram is known as a “hexbin” plot, because it shows the counts of observations that fall within hexagonal bins. This plot works best with relatively large datasets. It’s available through in matplotlib as matplotlib.axes.Axes.hexbin() and as a style in jointplot().
sns.jointplot(x='total_bill',y='tip',data=tips,kind='hex')
It is also possible to use the kernel density estimation procedure described above to visualize a bivariate distribution.
You can also draw a two-dimensional kernel density plot with the kdeplot() function.
f, ax = plt.subplots(figsize=(6, 6))
sns.kdeplot(tips['total_bill'], tips['tip'], ax=ax)
sns.rugplot(tips['total_bill'], color="g", ax=ax)
sns.rugplot(tips['tip'], vertical=True, ax=ax);
sns.jointplot(x='total_bill',y='tip',data=tips,kind='kde')
g = sns.jointplot(x='total_bill', y='tip',data=tips, kind="kde", color="red")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("$X$", "$Y$");
sns.pairplot(data, hue=None, hue_order=None, palette=None, vars=None, x_vars=None, y_vars=None, kind='scatter', diag_kind='auto', markers=None, height=2.5, aspect=1, corner=False, dropna=True, plot_kws=None, diag_kws=None, grid_kws=None, size=None)
sns.PairGrid(data, hue=None, hue_order=None, palette=None, hue_kws=None, vars=None, x_vars=None, y_vars=None, corner=False, diag_sharey=True, height=2.5, aspect=1, layout_pad=0, despine=True, dropna=True, size=None)
Pairplot will plot pairwise relationships across an entire dataframe (for the numerical columns) and supports a color hue argument (for categorical columns).
This creates a matrix of axes and shows the relationship for each pair of columns in a DataFrame. By default, it also draws the univariate distribution of each variable on the diagonal Axes:
#Quantitative variables relation
sns.pairplot(tips)
Specifying the hue parameter automatically changes the histograms to KDE plots to facilitate comparisons between multiple distributions.
#Quantitative variables relation colored by qualitative data
sns.pairplot(tips,hue='sex',palette='coolwarm')
Much like the relationship between jointplot() and JointGrid, the pairplot() function is built on top of a PairGrid object, which can be used directly for more flexibility:
g = sns.PairGrid(tips)
g.map_diag(sns.kdeplot)
g.map_offdiag(sns.kdeplot, n_levels=6);
sns.catplot(x=None, y=None, hue=None, data=None, row=None, col=None, col_wrap=None, estimator=<function mean at 0x105c7d9e0>, ci=95, n_boot=1000, units=None, seed=None, order=None, hue_order=None, row_order=None, col_order=None, kind='strip', height=5, aspect=1, orient=None, color=None, palette=None, legend=True, legend_out=True, sharex=True, sharey=True, margin_titles=False, facet_kws=None, **kwargs)
The default representation of the data in catplot() uses a scatterplot.
This function provides access to several axes-level functions that show the relationship between a numerical and one or more categorical variables using one of several visual representations. The kind parameter selects the underlying axes-level function to use:
Categorical estimate plots:
pointplot() (with kind="point")
barplot() (with kind="bar")
countplot() (with kind="count")
Categorical distribution plots:
boxplot() (with kind="box")
violinplot() (with kind="violin")
boxenplot() (with kind="boxen")
Categorical scatterplots:
stripplot() (with kind="strip"; the default)
swarmplot() (with kind="swarm")
sns.pointplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, estimator=<function mean at 0x105c7d9e0>, ci=95, n_boot=1000, units=None, seed=None, markers='o', linestyles='-', dodge=False, join=True, scale=1, orient=None, color=None, palette=None, errwidth=None, capsize=None, ax=None, **kwargs)
A point plot represents an estimate of central tendency for a numeric variable by the position of scatter plot points and provides some indication of the uncertainty around that estimate using error bars.
Point plots can be more useful than bar plots for focusing comparisons between different levels of one or more categorical variables. They are particularly adept at showing interactions: how the relationship between levels of one categorical variable changes across levels of a second categorical variable. The lines that join each point from the same hue level allow interactions to be judged by differences in slope, which is easier for the eyes than comparing the heights of several groups of points or bars.
It is important to keep in mind that a point plot shows only the mean (or other estimator) value, but in many cases it may be more informative to show the distribution of values at each level of the categorical variables. In that case, other approaches such as a box or violin plot may be more appropriate.
sns.set(style="darkgrid")
ax = sns.pointplot(x="time", y="total_bill", data=tips)
sns.barplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, estimator=<function mean at 0x105c7d9e0>, ci=95, n_boot=1000, units=None, seed=None, orient=None, color=None, palette=None, saturation=0.75, errcolor='.26', errwidth=None, capsize=None, dodge=True, ax=None, **kwargs)
Bar plot is a general plot that allows you to aggregate the categorical data based off some function, by default the mean.
#default aggregation is mean
sns.barplot(x='sex',y='total_bill',data=tips)
#aggregation can be changed to std
sns.barplot(x='sex',y='total_bill',data=tips,estimator=np.std)
sns.countplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, dodge=True, ax=None, **kwargs)
A count plot can be thought of as a histogram across a categorical, instead of quantitative, variable. This is essentially the same as barplot except the estimator is explicitly counting the number of occurrences. Which is why we only pass the x value:
# amount of cases
sns.countplot(x='sex',data=tips)
sns.boxplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.8, dodge=True, fliersize=5, linewidth=None, whis=1.5, ax=None, **kwargs)
A box plot (or box-and-whisker plot) shows the distribution of quantitative data in a way that facilitates comparisons between variables or across levels of a categorical variable. The box shows the quartiles of the dataset while the whiskers extend to show the rest of the distribution, except for points that are determined to be “outliers” using a method that is a function of the inter-quartile range.
sns.boxplot(x="day", y="total_bill", data=tips,palette='rainbow')
# Can do entire dataframe with orient='h'
sns.boxplot(data=tips,palette='rainbow',orient='h')
#can be colored using a second cathegorical variable
sns.boxplot(x="day", y="total_bill", hue="smoker",data=tips, palette="coolwarm")
sns.boxenplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.8, dodge=True, k_depth='proportion', linewidth=None, scale='exponential', outlier_prop=None, showfliers=True, ax=None, **kwargs)
This style of plot was originally named a “letter value” plot because it shows a large number of quantiles that are defined as “letter values”. It is similar to a box plot in plotting a nonparametric representation of a distribution in which all features correspond to actual observations. By plotting more quantiles, it provides more information about the shape of the distribution, particularly in the tails.
sns.boxenplot(x="day", y="total_bill", data=tips)
seaborn.violinplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, bw='scott', cut=2, scale='area', scale_hue=True, gridsize=100, width=0.8, inner='box', split=False, dodge=True, orient=None, linewidth=None, color=None, palette=None, saturation=0.75, ax=None, **kwargs)
A violin plot plays a similar role as a box plot. It shows the distribution of quantitative data across several levels of one (or more) categorical variables such that those distributions can be compared. Unlike a box plot, in which all of the plot components correspond to actual datapoints, the violin plot features a kernel density estimation of the underlying distribution.
#one cathegorical variable
sns.violinplot(x="day", y="total_bill", data=tips,palette='rainbow')
#second cathegorical variable
sns.violinplot(x="day", y="total_bill", data=tips,hue='sex',palette='Set1')
#direct comparison on the second variable
sns.violinplot(x="day", y="total_bill", data=tips,hue='sex',split=True,palette='Set1')
sns.stripplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, jitter=True, dodge=False, orient=None, color=None, palette=None, size=5, edgecolor='gray', linewidth=0, ax=None, **kwargs)
The stripplot will draw a scatterplot where one variable is categorical. A strip plot can be drawn on its own, but it is also a good complement to a box or violin plot in cases where you want to show all observations along with some representation of the underlying distribution.
sns.stripplot(x="day", y="total_bill", data=tips,jitter=True)
#colored using a second variable
sns.stripplot(x="day", y="total_bill", data=tips,jitter=True,hue='sex',palette='Set1')
#adding a second cathegorical variable
sns.stripplot(x="day", y="total_bill", data=tips,jitter=True,hue='sex',palette='Set1',split=True)
seaborn.swarmplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, dodge=False, orient=None, color=None, palette=None, size=5, edgecolor='gray', linewidth=0, ax=None, **kwargs)
The swarmplot is similar to stripplot(), but the points are adjusted (only along the categorical axis) so that they don’t overlap. This gives a better representation of the distribution of values, although it does not scale as well to large numbers of observations (both in terms of the ability to show all the points and in terms of the computation needed to arrange them).
sns.swarmplot(x="day", y="total_bill", data=tips)
sns.swarmplot(x="day", y="total_bill",hue='sex',data=tips, palette="Set1", dodge=True)
sns.violinplot(x="tip", y="day", data=tips,palette='rainbow')
sns.swarmplot(x="tip", y="day", data=tips,color='black',size=3)
Matrix plots allow you to plot data as color-encoded matrices and can also be used to indicate clusters within the data (later in the machine learning section we will learn how to formally cluster data).
sns.heatmap(data, vmin=None, vmax=None, cmap=None, center=None, robust=False, annot=None, fmt='.2g', annot_kws=None, linewidths=0, linecolor='white', cbar=True, cbar_kws=None, cbar_ax=None, square=False, xticklabels='auto', yticklabels='auto', mask=None, ax=None, **kwargs)
Plot rectangular data as a color-encoded matrix. This is an Axes-level function and will draw the heatmap into the currently-active Axes if none is provided to the ax argument.
# Matrix form for correlation data
tips.corr()
sns.heatmap(tips.corr())
sns.heatmap(tips.corr(),cmap='coolwarm',annot=True)
Or for the flights data:
flights.pivot_table(values='passengers',index='month',columns='year')
pvflights = flights.pivot_table(values='passengers',index='month',columns='year')
sns.heatmap(pvflights, annot=True, fmt="d")
sns.heatmap(pvflights,cmap="YlGnBu",linecolor='white',linewidths=1)
sns.clustermap(data, pivot_kws=None, method='average', metric='euclidean', z_score=None, standard_scale=None, figsize=(10, 10), cbar_kws=None, row_cluster=True, col_cluster=True, row_linkage=None, col_linkage=None, row_colors=None, col_colors=None, mask=None, dendrogram_ratio=0.2, colors_ratio=0.03, cbar_pos=(0.02, 0.8, 0.05, 0.18), tree_kws=None, **kwargs)
The clustermap uses hierarchal clustering to produce a clustered version of the heatmap.
sns.clustermap(pvflights)
Notice now how the years and months are no longer in order, instead they are grouped by similarity in value (passenger count). That means we can begin to infer things from this plot, such as August and July being similar (makes sense, since they are both summer travel months)
# More options to get the information a little clearer like normalization
sns.clustermap(pvflights,cmap='coolwarm',standard_scale=1)
Grids are general types of plots that allow you to map plot types to rows and columns of a grid, this helps you create similar plots separated by features.
sns.PairGrid(data, hue=None, hue_order=None, palette=None, hue_kws=None, vars=None, x_vars=None, y_vars=None, corner=False, diag_sharey=True, height=2.5, aspect=1, layout_pad=0, despine=True, dropna=True, size=None)
Pairgrid is a subplot grid for plotting pairwise relationships in a dataset.
# Just the Grid
sns.PairGrid(iris)
# Then you map to the grid
g = sns.PairGrid(iris, hue="species", palette="Set2")
g.map(plt.scatter)
g = sns.PairGrid(iris, hue="species", palette="Set2", hue_kws={"marker": ["o", "s", "D"]})
g = g.map(sns.scatterplot, linewidths=1, edgecolor="w", s=40)
g = g.add_legend()
# Map to upper,lower, and diagonal
g = sns.PairGrid(iris, hue = 'species', palette="Set2")
g.map_diag(plt.hist)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot)
sns.PairGrid(data, hue=None, hue_order=None, palette=None, hue_kws=None, vars=None, x_vars=None, y_vars=None, corner=False, diag_sharey=True, height=2.5, aspect=1, layout_pad=0, despine=True, dropna=True, size=None)
PairGrid is flexible, but to take a quick look at a dataset, it can be easier to use pairplot(). This function uses scatterplots and histograms by default, although a few other kinds will be added (currently, you can also plot regression plots on the off-diagonals and KDEs on the diagonal).
sns.pairplot(iris)
sns.pairplot(iris,hue='species',palette='rainbow')
sns.FacetGrid(data, row=None, col=None, hue=None, col_wrap=None, sharex=True, sharey=True, height=3, aspect=1, palette=None, row_order=None, col_order=None, hue_order=None, hue_kws=None, dropna=True, legend_out=True, despine=True, margin_titles=False, xlim=None, ylim=None, subplot_kws=None, gridspec_kws=None, size=None)
Multi-plot grid for plotting conditional relationships.
# Just the Grid
g = sns.FacetGrid(tips, col="time", row="smoker")
g = sns.FacetGrid(tips, col="time", row="smoker")
g = g.map(plt.hist, "total_bill")
g = sns.FacetGrid(tips, col="time", row="smoker",hue='sex')
# Notice hwo the arguments come after plt.scatter call
g = g.map(plt.scatter, "total_bill", "tip").add_legend()
JointGrid is the general version for jointplot() type grids, for a quick example:
g = sns.JointGrid(x="total_bill", y="tip", data=tips)
g = sns.JointGrid(x="total_bill", y="tip", data=tips)
g = g.plot(sns.regplot, sns.distplot)
Grids are general types of plots that allow you to map plot types to rows and columns of a grid, this helps you create similar plots separated by features.
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
iris = sns.load_dataset('iris')
iris.head()
Pairgrid is a subplot grid for plotting pairwise relationships in a dataset.
# Just the Grid
sns.PairGrid(iris)
# Then you map to the grid
g = sns.PairGrid(iris)
g.map(plt.scatter)
# Map to upper,lower, and diagonal
g = sns.PairGrid(iris)
g.map_diag(plt.hist)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot)
pairplot is a simpler version of PairGrid (you'll use quite often)
sns.pairplot(iris)
sns.pairplot(iris,hue='species',palette='rainbow')
FacetGrid is the general way to create grids of plots based off of a feature:
tips = sns.load_dataset('tips')
tips.head()
# Just the Grid
g = sns.FacetGrid(tips, col="time", row="smoker")
g = sns.FacetGrid(tips, col="time", row="smoker")
g = g.map(plt.hist, "total_bill")
g = sns.FacetGrid(tips, col="time", row="smoker",hue='sex')
# Notice hwo the arguments come after plt.scatter call
g = g.map(plt.scatter, "total_bill", "tip").add_legend()
You can set particular styles:
sns.set_style('white')
sns.countplot(x='sex',data=tips)
sns.set_style('whitegrid')
sns.countplot(x='sex',data=tips)
sns.set_style('ticks')
sns.countplot(x='sex',data=tips,palette='deep')
sns.set_style('dark')
sns.countplot(x='sex',data=tips,palette='deep')
sns.set_style('darkgrid')
sns.countplot(x='sex',data=tips,palette='deep')
sns.countplot(x='sex',data=tips)
sns.despine()
sns.countplot(x='sex',data=tips)
sns.despine(left=True)
You can use matplotlib's plt.figure(figsize=(width,height) to change the size of most seaborn plots.
You can control the size and aspect ratio of most seaborn grid plots by passing in parameters: size, and aspect. For example:
# Non Grid Plot
plt.figure(figsize=(12,3))
sns.countplot(x='sex',data=tips)
# Grid Type Plot
sns.lmplot(x='total_bill',y='tip',height=2,aspect=4,data=tips)
The set_context() allows you to override default parameters:
sns.set_context('notebook')
sns.countplot(x='sex',data=tips,palette='coolwarm')
sns.set_context('notebook',font_scale=2)
sns.countplot(x='sex',data=tips,palette='coolwarm')
sns.set_context('poster',font_scale=2)
sns.countplot(x='sex',data=tips,palette='coolwarm')
