#
# standard code to import packages and read data files
#

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

my_types = {
        'Species' : 'string',
        'Kingdom' : 'string',
        'Class'   : 'string',
        'Assembly status' : 'string',
        'Number of genes' : 'Int64',
        'Number of proteins' : 'Int64'
    }

euk = pd.read_csv(
    "eukaryotes.tsv", 
    sep="\t",
    dtype = my_types,
    na_values=['-']
)

euk_float = euk[euk["Size (Mb)"] < 4_000].dropna()
euk_float["Number of genes"] = euk_float["Number of genes"].astype(float)
euk_float["Number of proteins"] = euk_float["Number of proteins"].astype(float)

weather = pd.read_csv('weather.csv')
con = pd.read_csv("contigs.csv")
london_rain = pd.read_csv("london_rainfall.csv")
names = pd.read_csv("common_names.csv")



#############################################################
#                                                           #   
#           Example code is below this line                 #
#                                                           #
# ############################################################


# first lay out the grid
g = sns.FacetGrid(
    data=euk[
        (euk["Class"].isin(["Ascomycetes", "Basidiomycetes"]))
        & (euk["Publication year"].between(2010, 2013))
    ].dropna(),
    row="Class",
    col="Publication year",
    aspect=1,
    height=3,
)

g.map_dataframe(plot_gc_genes_contourmap)
g.set_axis_labels("GC%", "Number of genes")

# we need titles with a newline otherwise they are too long
g.set_titles("{row_name}\n{col_name}")

g.fig.suptitle(
    """Density plot of GC% 
versus number of predicted genes plus 5 largest genomes
for fungal classes published between 2010 and 2015""",
    y=1.15,
)