%run -i "standard_header.py"
%matplotlib inline

import matplotlib as mpl

mpl.rcParams["figure.dpi"] = 300

# select just animals and remove missing data
animals = euk[euk["Kingdom"] == "Animals"].dropna()

sns.relplot(
    data=animals,
    x="Number of genes",
    y="Number of proteins",
    size="Size (Mb)",
    sizes=(2, 150),
    hue="Publication year",
)
plt.title("Number of genes vs number of proteins\n for animal genomes")

# category_hue.py
animals = euk[euk["Kingdom"] == "Animals"].dropna()

sns.relplot(
    data=animals,
    x="Number of genes",
    y="Number of proteins",
    size="Size (Mb)",
    sizes=(2, 150),
    hue="Class",
)
plt.title(
    "Number of genes vs number of proteins\n for animal genomes in different classes"
)

# category_axis.py

sns.relplot(
    data=animals,
    y="Class",
    x="Number of genes",
    size="Size (Mb)",
    sizes=(2, 150),
)
plt.title("Number of genes \n for animal genomes in different classes")

# category_size.py

sns.relplot(
    data=animals,
    x="Number of genes",
    y="Number of proteins",
    size="Class",
    sizes=(2, 150),
)
plt.title(
    "Number of genes vs number of proteins\n for animal genomes in different classes"
)

animals["Class"].value_counts()

# category_style.py

my_genomes = euk[euk["Class"].isin(["Birds", "Fishes"])]

sns.relplot(
    data=my_genomes,
    x="Number of genes",
    y="Number of proteins",
    style="Class",
)
plt.title("Number of genes vs number of proteins\n for bird and fish genomes")

# col_scatter_plot.py

g = sns.relplot(
    data=my_genomes, x="Number of genes", y="Number of proteins", col="Class",
)

g.fig.suptitle("Number of genes vs. number of proteins", y=1.05)

# grid_plot.py

data = euk[
    (euk["Kingdom"].isin(["Plants", "Animals"]))
    & (euk["Assembly status"].isin(["Chromosome", "Scaffold"]))
    & (euk["Size (Mb)"] < 5000)   
      ]

g = sns.relplot(
    data=data,
    x="Size (Mb)",
    y="Number of proteins",
    col="Kingdom",
    row="Assembly status",
    aspect=1.5,
    height=3,
    s=10,
)

g.fig.suptitle("Size vs. number of proteins for plant and animal genomes", y=1.05)

# wrapped_columns.py

g = sns.relplot(
    data=data,
    x="Size (Mb)",
    y="Number of proteins",
    col="Publication year",
    col_wrap=4,
    hue="Assembly status",
    height=2,
    s=20,
)

g.fig.suptitle(
    """Size vs. number of proteins for plant and animal genomes
in different years""",
    y=1.05,
)

# grid_regression_plot.py

g = sns.lmplot(
    data=data,
    x="GC%",
    y="Number of proteins",
    col="Kingdom",
    hue="Assembly status",
)
g.fig.suptitle(
    """Linear regression of GC% vs. number of proteins
for plant and animal genomes""",
    y=1.05,
)