backend { default = "PBS" providers { Local { actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" config { #并未验证这个参数是否起作用 concurrent-job-limit = 384 runtime-attributes = """ String? docker_mount_str String? docker_user String? docker """ submit = "/usr/bin/env bash ${script}" submit-docker = """ docker run --network=host --entrypoint="" \ --rm -i --cidfile ${docker_cid} \ -u $(id -u):$(id -g) ${default='' docker_mount_str} \ -v ${cwd}:${cwd} -v ${cwd}:${docker_cwd} \ ${docker} ${job_shell} ${script} """ } } PBS { actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" config { // pbs_cpu is currently a String so that it can be evaluated as expression in runtime block // https://github.com/broadinstitute/cromwell/issues/1702 runtime-attributes = """ String mem = "8Gb" Int cpus = 2 String? queue String pbs_walltime = "24:00:00" String nodes = "1" String? docker_mount_str String? docker """ pbs_mem = "" exit-code-timeout-seconds = 20 script-epilogue = "" concurrent-job-limit = 384 // // By default PBS copies stdout and stderr from execution host to submission host only // after the job is done. This doesn't suit cromwell out-of-the-box, which assumes that // once the rc file exists, the output stream files are immediately ready. So we specify // "-k oe" to qsub, which keeps the stdout and stderr streams on execution node in // $PBS_JOBDIR - by default the user's $HOME which we assume is shared and available to // both execution and submission hosts - and insert some shell commands into the generated // script to move those files to the locations expected by cromwell /before/ the rc file // is created. // submit = """ set -ex pbs_mem=$(echo ${mem}|tr '[:upper:]' '[:lower:]') cd ${cwd}/execution qsub \ ${"-l nodes=" + nodes}${":ppn=" + cpus} \ "-l mem="$pbs_mem \ ${"-l walltime=" + pbs_walltime} \ ${"-q " + queue} \ -o ${out} \ -e ${err} \ -N ${job_name} \ -W umask=0007 \ ${script} """ submit-docker = """ pbs_mem=$(echo ${mem}|tr '[:upper:]' '[:lower:]') echo 'docker run --network=host --entrypoint="" --rm -i --cidfile ${docker_cid} -u $(id -u):$(id -g) ${default="" docker_mount_str} -v ${cwd}:${cwd} -v ${cwd}:${docker_cwd} ${docker} ${job_shell} ${script}' > ${script}_wrapper.sh cd ${cwd}/execution qsub \ ${"-l nodes=" + nodes}${":ppn=" + cpus} \ "-l mem="$pbs_mem \ ${"-l walltime=" + pbs_walltime} \ ${"-q " + queue} \ -o ${out} \ -e ${err} \ -N ${job_name} \ -W umask=0007 \ ${script}_wrapper.sh """ kill = "qdel ${job_id}; if [ ! -z ${docker_cid} ]; then docker stop `cat ${docker_cid}`; fi" check-alive = "qstat ${job_id}" job-id-regex = "(\\d+)\\.\\w+" filesystems { local { localization: [ "hard-link", "cached-copy", "copy" ] caching { # When copying a cached result, what type of file duplication should occur. # possible values: "hard-link", "soft-link", "copy", "cached-copy". # For more information check: https://cromwell.readthedocs.io/en/stable/backends/HPC/#shared-filesystem # Attempted in the order listed below: duplication-strategy: [ "hard-link", "cached-copy", "copy" ] # Possible values: md5, xxh64, fingerprint, path, path+modtime # For extended explanation check: https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching # "md5" will compute an md5 hash of the file content. # "xxh64" will compute an xxh64 hash of the file content. Much faster than md5 # "fingerprint" will take last modified time, size and hash the first 10 mb with xxh64 to create a file fingerprint. # This strategy will only be effective if the duplication-strategy (above) is set to "hard-link", as copying changes the last modified time. # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", # in order to allow for the original file path to be hashed. # "path+modtime" will compute an md5 hash of the file path and the last modified time. The same conditions as for "path" apply here. # Default: "md5" hashing-strategy: "fingerprint" # When the 'fingerprint' strategy is used set how much of the beginning of the file is read as fingerprint. # If the file is smaller than this size the entire file will be read. # Default: 10485760 (10MB). fingerprint-size: 10485760 # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. # Default: false check-sibling-md5: false } } } } } SLURM { actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" config { #并未验证这个参数是否起作用 concurrent-job-limit = 384 runtime-attributes = """ Int runtime_minutes = 600 Int cpus = 1 Int requested_memory_mb_per_core = 1000 String? queue String? mem String? docker_mount_str String? docker_user String? docker """ submit = """ sbatch \ -J ${job_name} \ -D ${cwd} \ -o ${out} \ -e ${err} \ -t ${runtime_minutes} \ ${"-c " + cpus} \ --mem-per-cpu=${requested_memory_mb_per_core} \ ${"--mem " + mem} \ ${"-p " + queue} \ --wrap "/bin/bash ${script}" """ submit-docker = """ sbatch \ -J ${job_name} \ -D ${cwd} \ -o ${cwd}/execution/stdout \ -e ${cwd}/execution/stderr \ -t ${runtime_minutes} \ ${"-c " + cpus} \ --mem-per-cpu=${requested_memory_mb_per_core} \ ${"--mem " + mem} \ ${"-p " + queue} \ --wrap "docker run --network=host --entrypoint="" --rm -i --cidfile ${docker_cid} -u $(id -u):$(id -g) ${default='' docker_mount_str} -v ${cwd}:${cwd} -v ${cwd}:${docker_cwd} ${docker} ${job_shell} ${script}" # --wrap "docker run ${default='' docker_mount_str} -v ${cwd}:${cwd} -v ${cwd}:${docker_cwd} ${docker} ${job_shell} ${script}" """ kill = "scancel ${job_id}; docker stop `cat ${docker_cid}`" check-alive = "squeue -j ${job_id}" job-id-regex = "Submitted batch job (\\d+).*" filesystems { local { localization: [ "hard-link", "cached-copy", "copy" ] caching { # When copying a cached result, what type of file duplication should occur. # possible values: "hard-link", "soft-link", "copy", "cached-copy". # For more information check: https://cromwell.readthedocs.io/en/stable/backends/HPC/#shared-filesystem # Attempted in the order listed below: duplication-strategy: [ "hard-link", "cached-copy", "copy" ] # Possible values: md5, xxh64, fingerprint, path, path+modtime # For extended explanation check: https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching # "md5" will compute an md5 hash of the file content. # "xxh64" will compute an xxh64 hash of the file content. Much faster than md5 # "fingerprint" will take last modified time, size and hash the first 10 mb with xxh64 to create a file fingerprint. # This strategy will only be effective if the duplication-strategy (above) is set to "hard-link", as copying changes the last modified time. # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", # in order to allow for the original file path to be hashed. # "path+modtime" will compute an md5 hash of the file path and the last modified time. The same conditions as for "path" apply here. # Default: "md5" hashing-strategy: "fingerprint" # When the 'fingerprint' strategy is used set how much of the beginning of the file is read as fingerprint. # If the file is smaller than this size the entire file will be read. # Default: 10485760 (10MB). fingerprint-size: 10485760 # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. # Default: false check-sibling-md5: false } } } } } } } workflow-options { workflow_failure_mode = "NoNewCalls" write_to_cache = true read_from_cache = true } docker { hash-lookup { // /!\ Attention /!\ // If you disable this call caching will be disabled for jobs with floating docker tags ! DOCKER_HASH_LOOKUP // Set this to match your available quota against the Google Container Engine API // gcr-api-queries-per-100-seconds = 1000 // Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again cache-entry-ttl = "20 minutes" // Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache cache-size = 200 // How should docker hashes be looked up. Possible values are "local" and "remote" // "local": Lookup hashes on the local docker daemon using the cli // "remote": Lookup hashes on docker hub and gcr method = "local" } } call-caching { enabled = true # In a multi-user environment this should be false so unauthorized users don't invalidate results for authorized users. invalidate-bad-cache-results = false } system { job-rate-control { jobs = 10 per = 10 second } # max-workflow-launch-count = 5 # max-retries = 50 # new-workflow-poll-rate = 60 io { number-of-attempts = 10 # Amount of time after which an I/O operation will timeout if no response has been received. # Note that a timeout may result in a workflow failure so be careful not to set a timeout too low. # Unless you start experiencing timeouts under very heavy load there should be no reason to change the default values. timeout { default = 3 minutes # Copy can be a time consuming operation and its timeout can be set separately. copy = 1 hour } } # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting # Defaults to false in server mode, and true in run mode. # abort-jobs-on-terminate = false # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode, # in particular clearing up all queued database writes before letting the JVM shut down. # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details. # graceful-server-shutdown = true # If 'true' then when Cromwell starts up, it tries to restart incomplete workflows # workflow-restart = true # Cromwell will cap the number of running workflows at N max-concurrent-workflows = 100 # Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist max-workflow-launch-count = 100 }