Copied!







###########################
#         IMPORTS         #
###########################

import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

###########################
#         HELPERS         #
###########################

# Simple header function for clean console output
def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

#############################################
#     LOAD BINARY LANDSCAPES DEPENDENCE     #
#############################################

# Include your local path to the library here
base_path = os.path.expanduser("~/FunEcoLab_IBFG Dropbox/")
sys.path.insert(1, base_path)

# Import the main package
import epistasia as ep

###########################
#         IMPORTS         #
###########################

import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

###########################
#         HELPERS         #
###########################

# Simple header function for clean console output
def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

#############################################
#     LOAD BINARY LANDSCAPES DEPENDENCE     #
#############################################

# Include your local path to the library here
base_path = os.path.expanduser("~/FunEcoLab_IBFG Dropbox/")
sys.path.insert(1, base_path)

# Import the main package
import epistasia as ep





Copied!







###########################
#         IMPORTS         #
###########################

import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

###########################
#         HELPERS         #
###########################

# Simple header function for clean console output
def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

#############################################
#     LOAD BINARY LANDSCAPES DEPENDENCE     #
#############################################

# Include your local path to the library here
base_path = os.path.expanduser("~/FunEcoLab_IBFG Dropbox/")
sys.path.insert(1, base_path)

# Import the main package
import epistasia as ep

###########################
#         IMPORTS         #
###########################

import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

###########################
#         HELPERS         #
###########################

# Simple header function for clean console output
def header(title):
    print("\n" + "=" * len(title))
    print(title)
    print("=" * len(title))

#############################################
#     LOAD BINARY LANDSCAPES DEPENDENCE     #
#############################################

# Include your local path to the library here
base_path = os.path.expanduser("~/FunEcoLab_IBFG Dropbox/")
sys.path.insert(1, base_path)

# Import the main package
import epistasia as ep





Copied!







path=os.path.expanduser("~/FunEcoLab_IBFG Dropbox/Noise/Datasets/")

L = ep.landscape_from_file(
    os.path.join(path, "Complete_landscape_Sabela.csv"),
)

display(L.to_dataframe)

path=os.path.expanduser("~/FunEcoLab_IBFG Dropbox/Noise/Datasets/")

L = ep.landscape_from_file(
    os.path.join(path, "Complete_landscape_Sabela.csv"),
)

display(L.to_dataframe)

/home/jose/FunEcoLab_IBFG Dropbox/epistasia/io.py:51: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support sep=None with delim_whitespace=False; you can avoid this warning by specifying engine='python'.
  return pd.read_csv(path, sep=sep, encoding=enc)

<bound method Landscape.to_dataframe of Landscape(N=10, R=5, shape=(1024, 5))
                  C10  C9  C8  C7  C6  C5  C4  C1  C12  C14     rep_1     rep_2     rep_3     rep_4     rep_5
state      Order                                                                                             
0000000000 0        0   0   0   0   0   0   0   0    0    0  0.000000  0.000000  0.000000  0.000000  0.000000
0000000001 1        0   0   0   0   0   0   0   0    0    1  0.048467  0.038793  0.133346  0.046642  0.144859
0000000010 1        0   0   0   0   0   0   0   0    1    0  0.215349  0.030650  0.212454  0.166285  0.199879
0000000011 2        0   0   0   0   0   0   0   0    1    1  0.162641  0.163804  0.130344  0.141865  0.180170
0000000100 1        0   0   0   0   0   0   0   1    0    0  0.160771  0.154306  0.164416  0.111621  0.099257
0000000101 2        0   0   0   0   0   0   0   1    0    1  0.170876  0.139765  0.253409  0.152870  0.192046
0000000110 2        0   0   0   0   0   0   0   1    1    0  0.171582  0.195181  0.171896  0.115580  0.228525
0000000111 3        0   0   0   0   0   0   0   1    1    1  0.153575  0.170719  0.157193  0.150281  0.170767
0000001000 1        0   0   0   0   0   0   1   0    0    0  0.065712  0.055235  0.120205  0.060541  0.043004
0000001001 2        0   0   0   0   0   0   1   0    0    1  0.069255  0.081904  0.170409  0.062216  0.146146
...>





Copied!







path=os.path.expanduser("~/FunEcoLab_IBFG Dropbox/Noise/Datasets/")

L = ep.landscape_from_file(
    os.path.join(path, "Complete_landscape_Sabela.csv"),
)

display(L.to_dataframe)

path=os.path.expanduser("~/FunEcoLab_IBFG Dropbox/Noise/Datasets/")

L = ep.landscape_from_file(
    os.path.join(path, "Complete_landscape_Sabela.csv"),
)

display(L.to_dataframe)

/home/jose/FunEcoLab_IBFG Dropbox/epistasia/io.py:51: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support sep=None with delim_whitespace=False; you can avoid this warning by specifying engine='python'.
  return pd.read_csv(path, sep=sep, encoding=enc)

<bound method Landscape.to_dataframe of Landscape(N=10, R=5, shape=(1024, 5))
                  C10  C9  C8  C7  C6  C5  C4  C1  C12  C14     rep_1     rep_2     rep_3     rep_4     rep_5
state      Order                                                                                             
0000000000 0        0   0   0   0   0   0   0   0    0    0  0.000000  0.000000  0.000000  0.000000  0.000000
0000000001 1        0   0   0   0   0   0   0   0    0    1  0.048467  0.038793  0.133346  0.046642  0.144859
0000000010 1        0   0   0   0   0   0   0   0    1    0  0.215349  0.030650  0.212454  0.166285  0.199879
0000000011 2        0   0   0   0   0   0   0   0    1    1  0.162641  0.163804  0.130344  0.141865  0.180170
0000000100 1        0   0   0   0   0   0   0   1    0    0  0.160771  0.154306  0.164416  0.111621  0.099257
0000000101 2        0   0   0   0   0   0   0   1    0    1  0.170876  0.139765  0.253409  0.152870  0.192046
0000000110 2        0   0   0   0   0   0   0   1    1    0  0.171582  0.195181  0.171896  0.115580  0.228525
0000000111 3        0   0   0   0   0   0   0   1    1    1  0.153575  0.170719  0.157193  0.150281  0.170767
0000001000 1        0   0   0   0   0   0   1   0    0    0  0.065712  0.055235  0.120205  0.060541  0.043004
0000001001 2        0   0   0   0   0   0   1   0    0    1  0.069255  0.081904  0.170409  0.062216  0.146146
...>





Copied!







# --- Demo DataFrame ---
df = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1, 1],
    "rep_1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.50, 1.60, 1.70],
    "rep_2": [1.05, 1.12, np.nan, 1.28, 1.38, 1.48, 1.58, 1.68],
    "rep_3": [0.95, 1.08, 1.22, 1.33, 1.41, np.nan, 1.61, 1.69],
})

header("Demo DataFrame (first rows)")
display(df)

# --- Demo DataFrame ---
df = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1, 1],
    "rep_1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.50, 1.60, 1.70],
    "rep_2": [1.05, 1.12, np.nan, 1.28, 1.38, 1.48, 1.58, 1.68],
    "rep_3": [0.95, 1.08, 1.22, 1.33, 1.41, np.nan, 1.61, 1.69],
})

header("Demo DataFrame (first rows)")
display(df)

===========================
Demo DataFrame (first rows)
===========================





Copied!







# --- Demo DataFrame ---
df = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1, 1],
    "rep_1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.50, 1.60, 1.70],
    "rep_2": [1.05, 1.12, np.nan, 1.28, 1.38, 1.48, 1.58, 1.68],
    "rep_3": [0.95, 1.08, 1.22, 1.33, 1.41, np.nan, 1.61, 1.69],
})

header("Demo DataFrame (first rows)")
display(df)

# --- Demo DataFrame ---
df = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1, 1],
    "rep_1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.50, 1.60, 1.70],
    "rep_2": [1.05, 1.12, np.nan, 1.28, 1.38, 1.48, 1.58, 1.68],
    "rep_3": [0.95, 1.08, 1.22, 1.33, 1.41, np.nan, 1.61, 1.69],
})

header("Demo DataFrame (first rows)")
display(df)

===========================
Demo DataFrame (first rows)
===========================





Copied!







# --- Build a Landscape from the DataFrame ---
L = ep.Landscape.from_dataframe(df)  # N: total number of species -> inferred automatically
                                  # R: total number of replics -> inferred automatically

# --- Build a Landscape from the DataFrame ---
L = ep.Landscape.from_dataframe(df)  # N: total number of species -> inferred automatically
                                  # R: total number of replics -> inferred automatically





Copied!







# --- Build a Landscape from the DataFrame ---
L = ep.Landscape.from_dataframe(df)  # N: total number of species -> inferred automatically
                                  # R: total number of replics -> inferred automatically

# --- Build a Landscape from the DataFrame ---
L = ep.Landscape.from_dataframe(df)  # N: total number of species -> inferred automatically
                                  # R: total number of replics -> inferred automatically





Copied!







# Show basic properties of the dataset
header("Landscape summary")
print(f"N (dimensions):      {L.N}")       # Number of binary variables
print(f"R (replicates):      {L.R}")       # Number of experimental replicates
print(f"order:               {L.order}")   # State ordering ('lex' = lexicographic)
print(f"M (observed states): {L.M}")       # Number of observed states (rows)
print(f"Feature names:       {L.feature_names}")  # List of feature names
print(L.states)                            # Binary matrix (M × N)


# Show basic properties of the dataset
header("Landscape summary")
print(f"N (dimensions):      {L.N}")       # Number of binary variables
print(f"R (replicates):      {L.R}")       # Number of experimental replicates
print(f"order:               {L.order}")   # State ordering ('lex' = lexicographic)
print(f"M (observed states): {L.M}")       # Number of observed states (rows)
print(f"Feature names:       {L.feature_names}")  # List of feature names
print(L.states)                            # Binary matrix (M × N)

=================
Landscape summary
=================
N (dimensions):      3
R (replicates):      3
order:               lex
M (observed states): 8
Feature names:       ['g0', 'g1', 'g2']
[[0 0 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 1 0]
 [1 1 1]
 [0 1 1]
 [1 0 1]]





Copied!







# Show basic properties of the dataset
header("Landscape summary")
print(f"N (dimensions):      {L.N}")       # Number of binary variables
print(f"R (replicates):      {L.R}")       # Number of experimental replicates
print(f"order:               {L.order}")   # State ordering ('lex' = lexicographic)
print(f"M (observed states): {L.M}")       # Number of observed states (rows)
print(f"Feature names:       {L.feature_names}")  # List of feature names
print(L.states)                            # Binary matrix (M × N)


# Show basic properties of the dataset
header("Landscape summary")
print(f"N (dimensions):      {L.N}")       # Number of binary variables
print(f"R (replicates):      {L.R}")       # Number of experimental replicates
print(f"order:               {L.order}")   # State ordering ('lex' = lexicographic)
print(f"M (observed states): {L.M}")       # Number of observed states (rows)
print(f"Feature names:       {L.feature_names}")  # List of feature names
print(L.states)                            # Binary matrix (M × N)

=================
Landscape summary
=================
N (dimensions):      3
R (replicates):      3
order:               lex
M (observed states): 8
Feature names:       ['g0', 'g1', 'g2']
[[0 0 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 1 0]
 [1 1 1]
 [0 1 1]
 [1 0 1]]





Copied!







# Compute mean value per state, ignoring NaNs
mean = L.mean_over_replicates()
mean_df = pd.DataFrame({
    "State index": range(L.M),
    "Mean across replicates": mean
})
display(mean_df)

# Compute mean value per state, ignoring NaNs
mean = L.mean_over_replicates()
mean_df = pd.DataFrame({
    "State index": range(L.M),
    "Mean across replicates": mean
})
display(mean_df)





Copied!







# Compute mean value per state, ignoring NaNs
mean = L.mean_over_replicates()
mean_df = pd.DataFrame({
    "State index": range(L.M),
    "Mean across replicates": mean
})
display(mean_df)

# Compute mean value per state, ignoring NaNs
mean = L.mean_over_replicates()
mean_df = pd.DataFrame({
    "State index": range(L.M),
    "Mean across replicates": mean
})
display(mean_df)





Copied!







# --- Select a subset of replicate columns (keeps all states) ---
L_subset = L.select_replicates([0, 2])  # Select only replicates 0 and 2

# Convert to DataFrame for inspection
subset_df = pd.DataFrame(
    np.hstack([L_subset.states, L_subset.values]),
    columns=[f"s{i}" for i in range(L_subset.N)] +
            [f"rep_{j}" for j in range(L_subset.R)]
)

print("Subset with replicate columns 0 and 2:")
display(subset_df)

# --- Select a subset of replicate columns (keeps all states) ---
L_subset = L.select_replicates([0, 2])  # Select only replicates 0 and 2

# Convert to DataFrame for inspection
subset_df = pd.DataFrame(
    np.hstack([L_subset.states, L_subset.values]),
    columns=[f"s{i}" for i in range(L_subset.N)] +
            [f"rep_{j}" for j in range(L_subset.R)]
)

print("Subset with replicate columns 0 and 2:")
display(subset_df)

Subset with replicate columns 0 and 2:





Copied!







# --- Select a subset of replicate columns (keeps all states) ---
L_subset = L.select_replicates([0, 2])  # Select only replicates 0 and 2

# Convert to DataFrame for inspection
subset_df = pd.DataFrame(
    np.hstack([L_subset.states, L_subset.values]),
    columns=[f"s{i}" for i in range(L_subset.N)] +
            [f"rep_{j}" for j in range(L_subset.R)]
)

print("Subset with replicate columns 0 and 2:")
display(subset_df)

# --- Select a subset of replicate columns (keeps all states) ---
L_subset = L.select_replicates([0, 2])  # Select only replicates 0 and 2

# Convert to DataFrame for inspection
subset_df = pd.DataFrame(
    np.hstack([L_subset.states, L_subset.values]),
    columns=[f"s{i}" for i in range(L_subset.N)] +
            [f"rep_{j}" for j in range(L_subset.R)]
)

print("Subset with replicate columns 0 and 2:")
display(subset_df)

Subset with replicate columns 0 and 2:





Copied!







# --- Select a subset of states (rows) ---
# Example 1: Select by explicit indices
L_sel_idx = L.select_states([0, 3, 5])

# Example 2: Select only rows without NaNs in the first replicate
mask_valid = ~np.isnan(L.values[:, 0])
L_sel_mask = L.select_states(mask_valid)

# Convert one of them to DataFrame for display
sel_df = pd.DataFrame(
    np.hstack([L_sel_idx.states, L_sel_idx.values]),
    columns=[f"s{i}" for i in range(L_sel_idx.N)] +
            [f"rep_{j}" for j in range(L_sel_idx.R)]
)

print("Subset with selected state indices [0, 3, 5]:")
display(sel_df)

# --- Select a subset of states (rows) ---
# Example 1: Select by explicit indices
L_sel_idx = L.select_states([0, 3, 5])

# Example 2: Select only rows without NaNs in the first replicate
mask_valid = ~np.isnan(L.values[:, 0])
L_sel_mask = L.select_states(mask_valid)

# Convert one of them to DataFrame for display
sel_df = pd.DataFrame(
    np.hstack([L_sel_idx.states, L_sel_idx.values]),
    columns=[f"s{i}" for i in range(L_sel_idx.N)] +
            [f"rep_{j}" for j in range(L_sel_idx.R)]
)

print("Subset with selected state indices [0, 3, 5]:")
display(sel_df)

Subset with selected state indices [0, 3, 5]:





Copied!







# --- Select a subset of states (rows) ---
# Example 1: Select by explicit indices
L_sel_idx = L.select_states([0, 3, 5])

# Example 2: Select only rows without NaNs in the first replicate
mask_valid = ~np.isnan(L.values[:, 0])
L_sel_mask = L.select_states(mask_valid)

# Convert one of them to DataFrame for display
sel_df = pd.DataFrame(
    np.hstack([L_sel_idx.states, L_sel_idx.values]),
    columns=[f"s{i}" for i in range(L_sel_idx.N)] +
            [f"rep_{j}" for j in range(L_sel_idx.R)]
)

print("Subset with selected state indices [0, 3, 5]:")
display(sel_df)

# --- Select a subset of states (rows) ---
# Example 1: Select by explicit indices
L_sel_idx = L.select_states([0, 3, 5])

# Example 2: Select only rows without NaNs in the first replicate
mask_valid = ~np.isnan(L.values[:, 0])
L_sel_mask = L.select_states(mask_valid)

# Convert one of them to DataFrame for display
sel_df = pd.DataFrame(
    np.hstack([L_sel_idx.states, L_sel_idx.values]),
    columns=[f"s{i}" for i in range(L_sel_idx.N)] +
            [f"rep_{j}" for j in range(L_sel_idx.R)]
)

print("Subset with selected state indices [0, 3, 5]:")
display(sel_df)

Subset with selected state indices [0, 3, 5]:





Copied!







# --- Retrieve replicate values for a specific state ---
# Example 1: Pass the binary state as a list
vals = L.get_values([1, 0, 1])
print("Replicate values for state [1, 0, 1]:")
print(vals)

# Example 2: Retrieve only some replicates
vals_sub = L.get_values([1, 0, 1], replicates=[0, 1])
print("\nOnly first two replicates:")
print(vals_sub)

# Example 3: Return a labeled DataFrame
vals_df = L.get_values([1, 0, 1], as_dataframe=True)
display(vals_df)

# Example 4: Retrieve using integer encoding (binary 101 = 5)
print("\nBy integer encoding (5):")
print(L.get_values(5))

# --- Retrieve replicate values for a specific state ---
# Example 1: Pass the binary state as a list
vals = L.get_values([1, 0, 1])
print("Replicate values for state [1, 0, 1]:")
print(vals)

# Example 2: Retrieve only some replicates
vals_sub = L.get_values([1, 0, 1], replicates=[0, 1])
print("\nOnly first two replicates:")
print(vals_sub)

# Example 3: Return a labeled DataFrame
vals_df = L.get_values([1, 0, 1], as_dataframe=True)
display(vals_df)

# Example 4: Retrieve using integer encoding (binary 101 = 5)
print("\nBy integer encoding (5):")
print(L.get_values(5))

Replicate values for state [1, 0, 1]:
[[1.7  1.68 1.69]]

Only first two replicates:
[[1.7  1.68]]

By integer encoding (5):
[[1.7  1.68 1.69]]





Copied!







# --- Retrieve replicate values for a specific state ---
# Example 1: Pass the binary state as a list
vals = L.get_values([1, 0, 1])
print("Replicate values for state [1, 0, 1]:")
print(vals)

# Example 2: Retrieve only some replicates
vals_sub = L.get_values([1, 0, 1], replicates=[0, 1])
print("\nOnly first two replicates:")
print(vals_sub)

# Example 3: Return a labeled DataFrame
vals_df = L.get_values([1, 0, 1], as_dataframe=True)
display(vals_df)

# Example 4: Retrieve using integer encoding (binary 101 = 5)
print("\nBy integer encoding (5):")
print(L.get_values(5))

# --- Retrieve replicate values for a specific state ---
# Example 1: Pass the binary state as a list
vals = L.get_values([1, 0, 1])
print("Replicate values for state [1, 0, 1]:")
print(vals)

# Example 2: Retrieve only some replicates
vals_sub = L.get_values([1, 0, 1], replicates=[0, 1])
print("\nOnly first two replicates:")
print(vals_sub)

# Example 3: Return a labeled DataFrame
vals_df = L.get_values([1, 0, 1], as_dataframe=True)
display(vals_df)

# Example 4: Retrieve using integer encoding (binary 101 = 5)
print("\nBy integer encoding (5):")
print(L.get_values(5))

Replicate values for state [1, 0, 1]:
[[1.7  1.68 1.69]]

Only first two replicates:
[[1.7  1.68]]

By integer encoding (5):
[[1.7  1.68 1.69]]





Copied!







###############################################################
# 0 Create new demo dataset with three types of missing data  #
###############################################################

#State [1 1 1] has been removed
df_missing = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1], 
    "rep1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.60, 1.70],
    "rep2": [1.05, 1.12, np.nan, 1.28, np.nan, 1.58, 1.68],
    "rep3": [0.95, 1.08, 1.22, 1.33, np.nan,  1.61, 1.69],
})


header("Demo DataFrame with missing data")
display(df_missing)

#Load as a Landscape object
L_missing = ep.Landscape.from_dataframe(df_missing) 

#################################################
# 1 Strict filtering: remove rows with ANY NaN  #
#################################################

L_complete = L_missing.drop_rows_with_any_nan()
clean_df = pd.DataFrame(
    np.hstack([L_complete.states, L_complete.values]),
    columns=[f"g{i}" for i in range(L_complete.N)] +
            [f"rep_{j}" for j in range(L_complete.R)]
)


print("1-STRICT FILTERING: no missing replicates")
display(clean_df)

#####################################################
# 2 Permissive filtering: remove rows with ALL NaNs #
#####################################################

# Introduce one row that is completely NaN in all replicates
L_partial = L_missing.drop_rows_with_all_nan()
partial_df = pd.DataFrame(
    np.hstack([L_partial.states, L_partial.values]),
    columns=[f"g{i}" for i in range(L_partial.N)] +
            [f"rep_{j}" for j in range(L_partial.R)]
)

print("2-PERMISSIVE FILTERING: REMOVE ROWS WITH ALL NaNS")
display(partial_df)

###########################################################
# 3 Missing states: configurations not present in dataset #
###########################################################

print("3-MISSING STATES: CONFIGURATIONS NOT PRESENT IN DATASET")

missing = L_missing.missing_states()
print(f"\nMissing {len(missing)} states out of {2**L_missing.N} total:")
display(pd.DataFrame(missing, columns=[f"g{i}" for i in range(L_missing.N)]))

###############################################################
# 0 Create new demo dataset with three types of missing data  #
###############################################################

#State [1 1 1] has been removed
df_missing = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1], 
    "rep1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.60, 1.70],
    "rep2": [1.05, 1.12, np.nan, 1.28, np.nan, 1.58, 1.68],
    "rep3": [0.95, 1.08, 1.22, 1.33, np.nan,  1.61, 1.69],
})


header("Demo DataFrame with missing data")
display(df_missing)

#Load as a Landscape object
L_missing = ep.Landscape.from_dataframe(df_missing) 

#################################################
# 1 Strict filtering: remove rows with ANY NaN  #
#################################################

L_complete = L_missing.drop_rows_with_any_nan()
clean_df = pd.DataFrame(
    np.hstack([L_complete.states, L_complete.values]),
    columns=[f"g{i}" for i in range(L_complete.N)] +
            [f"rep_{j}" for j in range(L_complete.R)]
)


print("1-STRICT FILTERING: no missing replicates")
display(clean_df)

#####################################################
# 2 Permissive filtering: remove rows with ALL NaNs #
#####################################################

# Introduce one row that is completely NaN in all replicates
L_partial = L_missing.drop_rows_with_all_nan()
partial_df = pd.DataFrame(
    np.hstack([L_partial.states, L_partial.values]),
    columns=[f"g{i}" for i in range(L_partial.N)] +
            [f"rep_{j}" for j in range(L_partial.R)]
)

print("2-PERMISSIVE FILTERING: REMOVE ROWS WITH ALL NaNS")
display(partial_df)

###########################################################
# 3 Missing states: configurations not present in dataset #
###########################################################

print("3-MISSING STATES: CONFIGURATIONS NOT PRESENT IN DATASET")

missing = L_missing.missing_states()
print(f"\nMissing {len(missing)} states out of {2**L_missing.N} total:")
display(pd.DataFrame(missing, columns=[f"g{i}" for i in range(L_missing.N)]))

================================
Demo DataFrame with missing data
================================

1-STRICT FILTERING: no missing replicates

2-PERMISSIVE FILTERING: REMOVE ROWS WITH ALL NaNS

3-MISSING STATES: CONFIGURATIONS NOT PRESENT IN DATASET

Missing 1 states out of 8 total:





Copied!







###############################################################
# 0 Create new demo dataset with three types of missing data  #
###############################################################

#State [1 1 1] has been removed
df_missing = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1], 
    "rep1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.60, 1.70],
    "rep2": [1.05, 1.12, np.nan, 1.28, np.nan, 1.58, 1.68],
    "rep3": [0.95, 1.08, 1.22, 1.33, np.nan,  1.61, 1.69],
})


header("Demo DataFrame with missing data")
display(df_missing)

#Load as a Landscape object
L_missing = ep.Landscape.from_dataframe(df_missing) 

#################################################
# 1 Strict filtering: remove rows with ANY NaN  #
#################################################

L_complete = L_missing.drop_rows_with_any_nan()
clean_df = pd.DataFrame(
    np.hstack([L_complete.states, L_complete.values]),
    columns=[f"g{i}" for i in range(L_complete.N)] +
            [f"rep_{j}" for j in range(L_complete.R)]
)


print("1-STRICT FILTERING: no missing replicates")
display(clean_df)

#####################################################
# 2 Permissive filtering: remove rows with ALL NaNs #
#####################################################

# Introduce one row that is completely NaN in all replicates
L_partial = L_missing.drop_rows_with_all_nan()
partial_df = pd.DataFrame(
    np.hstack([L_partial.states, L_partial.values]),
    columns=[f"g{i}" for i in range(L_partial.N)] +
            [f"rep_{j}" for j in range(L_partial.R)]
)

print("2-PERMISSIVE FILTERING: REMOVE ROWS WITH ALL NaNS")
display(partial_df)

###########################################################
# 3 Missing states: configurations not present in dataset #
###########################################################

print("3-MISSING STATES: CONFIGURATIONS NOT PRESENT IN DATASET")

missing = L_missing.missing_states()
print(f"\nMissing {len(missing)} states out of {2**L_missing.N} total:")
display(pd.DataFrame(missing, columns=[f"g{i}" for i in range(L_missing.N)]))

###############################################################
# 0 Create new demo dataset with three types of missing data  #
###############################################################

#State [1 1 1] has been removed
df_missing = pd.DataFrame({
    "g0": [0, 0, 0, 1, 1, 0, 1],
    "g1": [0, 0, 1, 0, 1, 1, 0],
    "g2": [0, 1, 0, 0, 0, 1, 1], 
    "rep1": [1.00, 1.10, 1.20, 1.30, np.nan, 1.60, 1.70],
    "rep2": [1.05, 1.12, np.nan, 1.28, np.nan, 1.58, 1.68],
    "rep3": [0.95, 1.08, 1.22, 1.33, np.nan,  1.61, 1.69],
})


header("Demo DataFrame with missing data")
display(df_missing)

#Load as a Landscape object
L_missing = ep.Landscape.from_dataframe(df_missing) 

#################################################
# 1 Strict filtering: remove rows with ANY NaN  #
#################################################

L_complete = L_missing.drop_rows_with_any_nan()
clean_df = pd.DataFrame(
    np.hstack([L_complete.states, L_complete.values]),
    columns=[f"g{i}" for i in range(L_complete.N)] +
            [f"rep_{j}" for j in range(L_complete.R)]
)


print("1-STRICT FILTERING: no missing replicates")
display(clean_df)

#####################################################
# 2 Permissive filtering: remove rows with ALL NaNs #
#####################################################

# Introduce one row that is completely NaN in all replicates
L_partial = L_missing.drop_rows_with_all_nan()
partial_df = pd.DataFrame(
    np.hstack([L_partial.states, L_partial.values]),
    columns=[f"g{i}" for i in range(L_partial.N)] +
            [f"rep_{j}" for j in range(L_partial.R)]
)

print("2-PERMISSIVE FILTERING: REMOVE ROWS WITH ALL NaNS")
display(partial_df)

###########################################################
# 3 Missing states: configurations not present in dataset #
###########################################################

print("3-MISSING STATES: CONFIGURATIONS NOT PRESENT IN DATASET")

missing = L_missing.missing_states()
print(f"\nMissing {len(missing)} states out of {2**L_missing.N} total:")
display(pd.DataFrame(missing, columns=[f"g{i}" for i in range(L_missing.N)]))

================================
Demo DataFrame with missing data
================================

1-STRICT FILTERING: no missing replicates

2-PERMISSIVE FILTERING: REMOVE ROWS WITH ALL NaNS

3-MISSING STATES: CONFIGURATIONS NOT PRESENT IN DATASET

Missing 1 states out of 8 total:

	State index	Mean across replicates
0	0	1.000000
1	1	1.100000
2	2	1.210000
3	3	1.303333
4	4	1.395000
5	5	1.490000
6	6	1.596667
7	7	1.690000

Keys	Action
`?`	Open this help
`n`	Next page
`p`	Previous page
`s`	Search

1. Basic usage¶

1.1. Importing the required packages¶

1.2. Loading a Binary Landscape from file (recommended)¶

1.3. Building a Binary Landscape from a DataFrame¶

1.4. Inspecting Basic Properties¶

1.5. Useful Methods¶

1.6. Filtering strategies for replicate NaNs¶

	g0	g1	g2	rep_1	rep_2	rep_3
0	0	0	0	1.0	1.05	0.95
1	0	0	1	1.1	1.12	1.08
2	0	1	0	1.2	NaN	1.22
3	1	0	0	1.3	1.28	1.33
4	1	1	0	NaN	1.38	1.41
5	1	1	1	1.5	1.48	NaN
6	0	1	1	1.6	1.58	1.61
7	1	0	1	1.7	1.68	1.69