%run -i "standard_header.py"
%matplotlib inline

import matplotlib as mpl

mpl.rcParams["figure.dpi"] = 300

g = sns.relplot(
    data=euk[euk["Class"].isin(["Birds", "Fishes"])].dropna(),
    x="Number of genes",
    y="Number of proteins",
    col="Class",
    hue="Assembly status",
)
g.fig.suptitle(
    "Number of genes vs. Number of proteins for bird and fish genomes\nin different assembly status",
    y=1.1,
)

g = sns.catplot(
    data=euk[euk["Class"].isin(["Birds", "Fishes"])].dropna(),
    x="Assembly status",
    y="Number of genes",
    kind="box",
    col="Class",
)
g.fig.suptitle(
    "Distribution of number of genes in bird and fish genomes\nin different assembly status",
    y=1.1,
)

sns.distplot(
    euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])]["GC%"],
    col="Class",
    col_wrap=3,
)

# make_grid.py

sns.FacetGrid(
    data=euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])],
    col="Class",
    col_wrap=3,
    aspect=1,
    height=4,
)

# grid_histogram.py

# first lay out the grid
g = sns.FacetGrid(
    data=euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])],
    col="Class",
    aspect=1,
    height=4,
)

# then plot the charts
g.map(
    sns.distplot,  # this is the function that will draw each chart
    "GC%",  # this is the column that we want to plot
    color="purple",  # this is an extra argument to distplot
)
g.fig.suptitle("Distribution of GC% for each animal class", y=1.05)

# grid_kdeplot.py

# first lay out the grid
g = sns.FacetGrid(
    data=euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])],
    col="Class",
    aspect=1,
    height=4,
)

# then plot the charts
g.map(
    sns.kdeplot,
    "GC%",
    "Number of genes",
    cmap="OrRd",
    shade=True,
    shade_lowest=False,
)

g.fig.suptitle(
    "Density plot of GC% versus number of predicted genes for different animal classes",
    y=1.05,
)

# plot_single.py


def plot_gc_genes_contourmap(data, color):
    sns.kdeplot(
        data.dropna()["GC%"],
        data.dropna()["Number of genes"],
        cmap="OrRd",
        shade=True,
        shade_lowest=False,
    )

plot_gc_genes_contourmap(euk, None)

# grid_function.py

# first lay out the grid
g = sns.FacetGrid(
    data=euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])],
    col="Class",
    aspect=1,
    height=4,
)

g.map_dataframe(plot_gc_genes_contourmap)
g.set_axis_labels("GC%", "Number of genes")

g.fig.suptitle(
    "Density plot of GC% versus number of predicted genes for different animal classes",
    y=1.05,
)

# custom_regression.py


def plot_gc_genes_contourmap(data, color):

    # first draw the contour plot
    sns.kdeplot(
        data.dropna()["GC%"],
        data.dropna()["Number of genes"],
        cmap="OrRd",
        shade=True,
        shade_lowest=False,
    )

    # then draw the regression line only
    # no confidence intervals
    sns.regplot(
        data=data,
        x="GC%",
        y="Number of genes",
        scatter=None,
        ci=None,
        color="lightblue",
    )


plot_gc_genes_contourmap(euk, None)

# grid_custom_regression.py

# first lay out the grid
g = sns.FacetGrid(
    data=euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])],
    col="Class",
    aspect=1,
    height=4,
)

g.map_dataframe(plot_gc_genes_contourmap)
g.set_axis_labels("GC%", "Number of genes")

g.fig.suptitle(
    "Density plot and linear regression of GC% versus number of predicted genes\n for different animal classes",
    y=1.1,
)
None

# custom_overlay.py


def plot_gc_genes_contourmap(data, color):
    sns.kdeplot(
        data.dropna()["GC%"],
        data.dropna()["Number of genes"],
        cmap="OrRd",
        shade=True,
        shade_lowest=False,
    )

    biggest = data.dropna().sort_values("Size (Mb)").tail(5)

    sns.scatterplot(
        data=biggest, x="GC%", y="Number of genes", color="darkgreen"
    )


plot_gc_genes_contourmap(euk, None)

# grid_custom_overlay.py

# first lay out the grid
g = sns.FacetGrid(
    data=euk[euk["Class"].isin(["Roundworms", "Flatworms", "Insects"])],
    col="Class",
    aspect=1,
    height=4,
)

g.map_dataframe(plot_gc_genes_contourmap)
g.set_axis_labels("GC%", "Number of genes")

g.fig.suptitle(
    "Density plot of GC% versus number of predicted genes plus 5 largest genomes\n for different animal classes",
    y=1.1,
)
None

# large_grid_overlay.py

# first lay out the grid
g = sns.FacetGrid(
    data=euk[
        (euk["Class"].isin(["Ascomycetes", "Basidiomycetes"]))
        & (euk["Publication year"].between(2010, 2013))
    ].dropna(),
    row="Class",
    col="Publication year",
    aspect=1,
    height=3,
)

g.map_dataframe(plot_gc_genes_contourmap)
g.set_axis_labels("GC%", "Number of genes")

# we need titles with a newline otherwise they are too long
g.set_titles("{row_name}\n{col_name}")

g.fig.suptitle(
    """Density plot of GC% 
versus number of predicted genes plus 5 largest genomes
for fungal classes published between 2010 and 2015""",
    y=1.15,
)

plt.figure(figsize=(8, 4))

for kingdom in euk["Kingdom"].unique():

    one_kingdom = euk[euk["Kingdom"] == kingdom]

    sns.distplot(one_kingdom["GC%"].dropna(), hist=False, label=kingdom)

plt.title(
    "Distribution of GC percentage for genomes\nbelonging to different kingdoms"
)

# draw_one_distribution.py


def draw_distribution(data, color, label):
    sns.distplot(data["GC%"].dropna(), hist=False, label=label, color=color)


draw_distribution(euk, "red", "all genomes")

# grid_distribution.py

g = sns.FacetGrid(data=euk, hue="Kingdom", aspect=2, height=4)

g.map_dataframe(draw_distribution)
g.set_axis_labels("GC%")
g.add_legend()
g.fig.suptitle(
    "Distribution of GC percentage for genomes\nbelonging to different kingdoms",
    y=1.1,
)

# complex_grid_distribution.py

data = euk[
    (euk["Kingdom"] == "Fungi") & (euk["Publication year"].between(2010, 2017))
]

g = sns.FacetGrid(
    data=data,
    hue="Class",
    col="Publication year",
    col_wrap=4,
    aspect=1,
    height=3,
    palette="Set2",
)

g.map_dataframe(draw_distribution)
g.set_axis_labels("GC%")
g.add_legend()
g.fig.suptitle(
    "Distribution of GC percentage for fungal genomes in different classes\nfor years between 2010 and 2017",
    y=1.1,
)

# set_up_weather.py

weather = pd.read_csv("all_weather.csv")

from pandas.api.types import CategoricalDtype

months = ["January", "February", "March", "April", "May", "June", "July",
    "August", "September", "October", "November", "December"]

weather["Month"] = weather["Month"].astype(
    CategoricalDtype(categories=months, ordered=True)
)
weather.head()

(weather.groupby(["City", "Year"])["Temperature (°C)"].mean().unstack(1))

# plot_single_heatmap.py


def plot_temperature(data, color):
    sns.heatmap(
        data.groupby(["City", "Year"])["Temperature (°C)"]
        .mean()
        .unstack(1)
        .sort_index(ascending=False),
        cmap="hot",
    )


plot_temperature(weather, None)

# grid_heatmap.py

g = sns.FacetGrid(weather, col="Month", col_wrap=4, aspect=2, height=2)

g.map_dataframe(plot_temperature)

g.fig.suptitle("Mean monthly temperature for three cities by year", y=1.05)

# grid_heatmap_scale.py


def plot_temperature(data, color):
    sns.heatmap(
        data.groupby(["City", "Year"])["Temperature (°C)"]
        .mean()
        .unstack(1)
        .sort_index(ascending=False),
        vmin=-10,
        vmax=30,
        cmap="hot",
    )


g = sns.FacetGrid(weather, col="Month", col_wrap=4, aspect=2, height=1.5)

g.map_dataframe(plot_temperature)

g.fig.suptitle("Mean monthly temperature for three cities by year", y=1.05)

# plot_with_args.py


def plot_temperature(data, color, vmin, vmax):
    sns.heatmap(
        data.groupby(["City", "Year"])["Temperature (°C)"]
        .mean()
        .unstack(1)
        .sort_index(ascending=False),
        vmin=vmin,
        vmax=vmax,
        cmap="hot",
    )

# grid_with_args.py

g = sns.FacetGrid(weather, col="Month", col_wrap=4, aspect=2, height=1.5)

g.map_dataframe(
    plot_temperature,
    vmin=weather["Temperature (°C)"].min(),
    vmax=weather["Temperature (°C)"].max(),
)

g.fig.suptitle("Mean monthly temperature for three cities by year", y=1.05)

# plot_with_cbar.py


def plot_temperature(data, color, vmin, vmax, cbar_axis):
    sns.heatmap(
        data.groupby(["City", "Year"])["Temperature (°C)"]
        .mean()
        .unstack(1)
        .sort_index(ascending=False),
        vmin=vmin,
        vmax=vmax,
        cmap="hot",
        cbar_ax=cbar_axis,
    )

# grid_with_cbar.py

g = sns.FacetGrid(weather, col="Month", col_wrap=4, aspect=2, height=1.5)

cbar_ax = g.fig.add_axes(
    [
        1.05,  # distance from the left of the figure
        0.2,  # distance from the bottom of the figure
        0.02,  # width of the axis
        0.6,
    ]  # height of the axis
)

g.map_dataframe(
    plot_temperature,
    vmin=weather["Temperature (°C)"].min(),
    vmax=weather["Temperature (°C)"].max(),
    cbar_axis=cbar_ax,
)

g.fig.suptitle(
    "Mean daily temperature in three cities between 1960 and 2017", y=1.1
)
g.fig.suptitle("Mean monthly temperature for three cities by year", y=1.05)