Running Constraint Scan

Remember to run for both BLUE and WHITE light

This notebook can either be run as a notebook, or using nbdev a python script is produced that can be run in the command line for easier running on e.g. a cluster. It is a notebook in the nbs directory and exported as a .py file in the mmon-gcm library.

# This cell isn't exported to the .py file, so define here if running in notebook rather than as .py on e.g.a cluster
# This is also where to change whether the light is blue or white if you want to run in the notebook

sys.argv = [
    "script_name",
    "../outputs/constraint_scan/constraint_scan_results_blue.csv",
    "../models/4_stage_GC.json",
    "../outputs/constraint_scan/constraints_df.csv",
    "blue",
    "6",
]

If you are running from notebook you’ll want to comment out the next cell! It’s here so that running nbdev tests doesn’t take too long, as they test running the entire notebook. It won’t be exported so running as a script will be fine

#test == True
#constraints = constraints.iloc[:10]  # so tests don't take too long

constraints.head()

	P_abs	T_l	A_l	V_gc_ind	FqFm	R_ch	R_ch_vol	L_air	L_epidermis	Vac_frac	...	R	N_gcs	n	m	r	s	C_apo	A_closed	A_open	ATPase
0	0.815093	0.000194	1.0	2.262641e-12	0.809297	0.179970	0.191527	0.240879	0.214860	0.858645	...	0.08205	4.484392e+08	1.933548	0.992641	7.886553e-14	1.678654e-13	0.033679	2.321339	11.318979	10.816668
1	0.923180	0.000190	1.0	1.250880e-12	0.889162	0.093534	0.195869	0.337197	0.196606	0.837767	...	0.08205	1.064044e+09	2.025073	0.946228	5.400144e-14	1.664063e-13	0.023524	2.500598	9.754649	0.549826
2	0.830507	0.000220	1.0	5.035745e-13	0.821060	0.167889	0.204824	0.331556	0.205674	0.816618	...	0.08205	5.758277e+08	2.141889	0.972835	6.579620e-14	2.457118e-13	0.034062	2.802180	3.338120	7.823891
3	0.880998	0.000192	1.0	8.629192e-13	0.866582	0.051244	0.204472	0.309538	0.169957	0.813726	...	0.08205	3.851195e+08	2.077401	0.940848	5.747590e-14	1.515328e-13	0.029770	3.399462	9.936390	12.606738
4	0.915597	0.000220	1.0	7.391447e-13	0.846358	0.059969	0.193449	0.352066	0.238671	0.810491	...	0.08205	1.046353e+09	2.396012	0.817798	7.654181e-14	1.652973e-13	0.028420	3.305233	7.650706	10.970481

5 rows × 21 columns

print(f"Number of constraints = {len(constraints.index)}")

Number of constraints = 968

Breakdown into batches of constraints

if os.path.exists(f"{results_path[:-4]}_{num_batches}.csv"):
    directory_path = os.path.dirname(results_path)
    # Get a list of all files in the directory
    all_files = os.listdir(directory_path)
    # Filter the list to include
    csv_files = [file for file in all_files if file.startswith(f"{results_path[:-4]}_".rsplit("/")[-1])]

    # Sort the list using a lambda function and underscore as separator
    csv_files = sorted(csv_files, key=lambda x: int(x.rsplit('_')[-1].split('.')[0]))

    # Initialize an empty list to store DataFrames
    dfs = []

    # Read each CSV file into a DataFrame and append it to the list
    for csv_file in csv_files:
        file_path = os.path.join(directory_path, csv_file)
        df = pd.read_csv(file_path)
        dfs.append(df)

    # Concatenate all DataFrames into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)

    # Remove 'Unnamed: 0' column
    if 'Unnamed: 0' in combined_df.columns:
        combined_df = combined_df.drop('Unnamed: 0', axis=1)
        
    # Print or use the combined DataFrame as needed
    combined_df.to_csv(results_path)
    
    for csv_file in list(csv_files):
        file_path = os.path.join(directory_path, csv_file)
        if os.path.exists(file_path):
            os.remove(file_path)

combined_df.shape

(968, 7101)