%run -i "standard_header.py"
%matplotlib inline

import matplotlib as mpl

mpl.rcParams["figure.dpi"] = 300

animals = euk[euk["Kingdom"] == "Animals"].dropna()

# aspect.py

sns.relplot(
    data=euk[euk["Size (Mb)"] < 5000],
    x="Size (Mb)",
    y="Number of genes",
    aspect=2,
)

plt.title("Genome size vs number of genes for genomes < 5000 Mb")
plt.xlabel("size (Megabases)")

# tweak column names
new_animals = animals.rename(
    columns={
        "Number of genes": "Number of predicted genes",
        "Number of proteins": "Number of predicted proteins",
        "GC%": "GC percentage",
    }
)
new_animals.head()

sns.relplot(
    data=new_animals,
    x="Number of predicted genes",
    y="Number of predicted proteins",
    size="Size (Mb)",
    sizes=(2, 150),
    hue="GC percentage",
)
plt.title("Number of genes vs number of proteins\n for animal genomes")

new_animals.describe()

new_animals.columns

# set_labels.py

sns.relplot(
    data=animals, x="Number of genes", y="Number of proteins", height=6
)
plt.title("Number of genes vs number of proteins\n for animal genomes")
plt.xlabel("Number of predicted genes")
plt.ylabel("Number of predicted proteins")

# set_legend_labels.py

chart = sns.relplot(
    data=animals,
    x="Number of genes",
    y="Number of proteins",
    size="Size (Mb)",
    sizes=(2, 150),
    hue="Publication year",
)
plt.title("Number of genes vs number of proteins\n for animal genomes")
plt.xlabel("Number of predicted genes")
plt.ylabel("Number of predicted proteins")

chart._legend.get_texts()[0].set_text("Year\nsequenced")
chart._legend.get_texts()[6].set_text("Genome\nsize (Mb)")

options = {
    "grid.linewidth": 5.0,          # make grid lines thick
    "axes.titlesize": 18.0,         # make title large
    "xtick.major.width": 3,         # make tick lines thick
    "axes.linewidth": 3,            # make axis lines thick
    "axes.facecolor": "lightpink",  # make background pink
    "axes.edgecolor": "green",      # make axis lines green
    "xtick.bottom": True,           # show ticks on the x axis
    "grid.linestyle": "--",         # make grid dotted
}

# set_styles.py

options = {
    "grid.linewidth": 5.0,  # make grid lines thick
    "axes.titlesize": 18.0,  # make title large
    "xtick.major.width": 3,  # make tick lines thick
    "axes.linewidth": 3,  # make axis lines thick
    "axes.facecolor": "lightpink",  # make background pink
    "axes.edgecolor": "green",  # make axis lines green
    "xtick.bottom": True,  # show ticks on the x axis
    "grid.linestyle": "--",  # make grid dotted
}

with plt.rc_context(options):
    sns.relplot(
        data=animals, x="Number of genes", y="Number of proteins", height=6
    )
    plt.title("Number of genes vs number of proteins\n for animal genomes")
    plt.xlabel("Number of predicted genes")
    plt.ylabel("Number of predicted proteins")

# marker_size.py

sns.relplot(
    data=euk[euk["Size (Mb)"] < 5000],
    x="Size (Mb)",
    y="Number of genes",
    s=10,  # specify small marker size
)

plt.title("Genome size vs number of genes\n for genomes < 5000 Mb")
plt.xlabel("size (Megabases)")

sns.relplot(
    data=euk[euk["Size (Mb)"] < 5000],
    x="Size (Mb)",
    y="Number of genes",
    c="orange",  # specify orange color
)

# passing_color.py

sns.relplot(
    data=euk[euk["Size (Mb)"] < 5000],
    x="Size (Mb)",
    y="Number of genes",
    color="orange",  # specify orange color
)

plt.title("Genome size vs number of genes\n for genomes < 5000 Mb")
plt.xlabel("size (Megabases)")

sns.distplot(euk["GC%"].dropna())
plt.title("Distribution of GC% for all genomes")

# cumulative_hist.py

sns.distplot(euk["GC%"].dropna(), hist_kws={"cumulative": True}, kde=False)
plt.title("Cumulative histogram of GC% for all genomes")

sns.distplot(euk["GC%"].dropna(), hist_kws={"cumulative": True}, kde=False)
plt.title("Cumulative histogram of GC% for all genomes")
plt.savefig("hist.svg")