# Copyright Mirela Andronescu 2008
# Sample options file for CGlearn.pl
# Comments are lines starting with #

######################################################################
# Most important input options
######################################################################

Executable that creates the training and test data sets: tools/add_initial_predictions_simfold
# The name of the executable that can be used to obtain the training and test data sets
# none if no such executable is provided, in which case training set (and optionally test set)
#   have to be provided.

Training file: data/TRA_StructFull-max500-fold1.txt
# Other examples: data/TRA_151Rfam_Tco_MFE_fm363.txt
# must contain predictions with the initial parameter set

Training filename: TRA-SF1
# Other examples: TRA-151Rfam
# the "Training filename" is a friendly short name used for the new directory name only

Number of iterations {50|100|...}: 2
# the number of CG iterations
# should be at least 10, usually 50 is enough

Bounds parameter B {1K|10K|0.8%|inf|...}: 1K
# lower and upper bound on the parameters
# if you don't want any bounds, type inf (for infinity)

Thermo XML file: data/RNA-thermo-db_v1.3.xml
Thermo XML filename: THERMO1.3
# The XML file used for thermodynamic set.
# This parameter is ignored if the weight of the thermo set is 0.

Executable that creates the thermodynamic constraints: tools/create_thermo_constraints_simfold
# The name of the executable that creates the thermodynamic constraints

Weight of the thermodynamic set lambda {>=0}: 200
# At the ISMB 2007 paper, this was between 0 and 1, but now can be any [0,inf] number
# It is 0 if no thermodynamic set is included

Use published errors: 0
# 1 or 0, for whether or not we want to use the published errors in the precision of the thermo set.
# In my experience, 1 didn't give good results.

Add regularizer: 2
# 0 = don't add any regularizer
# 1 = add lasso regularizer, i.e. q=1, no feature similarity
# 2 = add ridge regularizer, i.e. q=2, no feature similarity
# 3 = add regularizer with feature similarity
# 3 is not implemented yet

Regularizer mean file: data/turner_parameters_fm363_constrdangles.txt
# A file with the mean values for each parameter
# If we want a regularizer around 0, this file could be data/zero_mean.txt

Regularizer mean filename: T99
# mean values to be around the Turner99 parameters: T99D0, or just T99
# Zero mean: 0

Regularizer bound eta: 0.6
# If regularizer is 1 or 2, this is the value of eta, i.e. the bound of the regularizer constraint
# Could be changed to an external file with different variance for each parameter

Use a max margin approach: 0
# 0 = CG
# 1 = DIM-CG
# 2 = LAM-CG
# If it is 1, instead of inequality I have equality, no other change is done
#   That means I try to maximize the difference between the known structure and other structures
# If it is 2, it's the loss-augmented CG
# If it is 0, I use inequalities, as explain in the ISMB07 paper.

Validation file: none
# If a validation file is given, at each iteration it measures the accuracy of the corresponding parameters
#   on the validation file, and at the end it chooses the parameters that give the best results on the valdiation file.

Test file: data/TES_StructFull-max500-fold1.txt
# the file I test on at the end of the iterations
# must be in the same format as the training file
# can be none

Initial params file: data/turner_parameters_fm363_constrdangles.txt
# The file with the initial parameters

Fixparams file: data/params_fix_205_259.txt
# A file which contains fixed values for some parameters, or the word 
#   "variable" for the non-fixed parameters. 
# For example data/params_fix_fitted_thermo_l2norm.txt contains values
#   obtained by doing linear regression on the thermodynamic set only.
# Another example is when we want to fix the dangling end parameters
#   to the Turner99 values or to 0.
# type none if no parameters are fixed

Fixparams filename: FIX-205-259
# a friendly name used for the new directory name only

Additional constraints file: data/constraints_dangling_ends_fm363.txt
# A file which contains general constraints.
# For example, in the 363-parameters model, we want the 3' dangling ends
#   to be less than the 5' dangling ends.
# Specific bounds on the parameters can be given here too.
#   for example the dangling ends have to be negative
# Example of such file: data/constraints_dangling_ends_fm363.txt
# type none if no additional constraints are given

Additional constraints filename: CON-DL
# A friendly name used for the new directory name only

Executable that creates the structural constraints: tools/create_structural_constraints_simfold
# The name of the executable that creates the structural constraints

Executable that predicts and analyses results: tools/predict_and_analyse_results_simfold
# The name of the executable that predicts and analyses the results of the new parameters

######################################################################
# Other options, I suggest you leave these unchanged 
######################################################################

L norm used for the objective function {1|2}: 2
# default is 2

Normalisation criterion {n1|n2}: n2
# n1: 1/D, n2 = 1/N*1/numstr (in the ISMB paper it's n2)

Model is simplified {1|0}: 0
# 1 if we don't consider internal loops 1x1, 1x2 and 2x2 separately
# 0 if we consider the model described in the ISMB paper (363 parameters)

Do perturbation {0|1}: 0
# 0 means the algorithm stops if no more constraints can be generated
# 1 means the latest parameters are randomly perturbed by at most 1kcal/mol, 
#      and then the iterations continue.

Minimum length sum: 4000
# This is the minimum length sum of the sequences used within the same SGE job
# For simfold, should be at least 3000
# For hotknots should be 200 or 300.

Cluster for cplex: icics
# The cluster where to run cplex. Can be arrow or icics (does not include beta, 
#   because we want a single type of machine, in case we want to measure runtime)
# WARNING: don't use the arrow cluster before you get permission!

Cluster for prediction: icics
# The cluster where to run cplex. Can be arrow or icics (includes beta)
# WARNING: don't use the arrow cluster before you get permission!