Cms Scale Test Setup

As described in #4490 , CMS needs a wide-area glidein pool with 200,000 cores. We have a test setup that simulates a large pool to see how we can scale to that level. We use only a fraction of the normal hardware for execute nodes by running many startds per core and running sleeps jobs that don't consume CPU or memory.

This page documents both the HTCondor configuration (i.e. changes to the default RPM) as well as any Linux kernel parameter tuning required, divided into three sections: configs/tunings on the central manager, configs/tunings required on the submit nodes, and configs/tunings on the execute machines.

StartD

UPDATE_INTERVAL=$RANDOM_INTEGER(540, 740, 1)

Central Manager

The central managers are running on HA mode, test-012.t2.ucsd.edu and e143.chtc.wisc.edu. They have 128GB of memory and 32cores. they have collectors trees with 200 child collectors.

Central Manager Configuration:

  • System Configurations:
    net.core.rmem_max = 10000000 (via sysctl)
    renice -5 main Collector Process
    

  • 00_gwms_general.config
    CONDOR_HOST=$(FULL_HOSTNAME)
    UID_DOMAIN=$(FULL_HOSTNAME)
    FILESYSTEM_DOMAIN=$(FULL_HOSTNAME)
    LOCK = $(LOG)
    DAEMON_LIST   = MASTER
    SEC_DAEMON_SESSION_DURATION = 50000
    SEC_DEFAULT_AUTHENTICATION = REQUIRED
    SEC_DEFAULT_AUTHENTICATION_METHODS = FS,GSI
    SEC_READ_AUTHENTICATION    = OPTIONAL
    SEC_CLIENT_AUTHENTICATION  = OPTIONAL
    DENY_WRITE         = anonymous@*
    DENY_ADMINISTRATOR = anonymous@*
    DENY_DAEMON        = anonymous@*
    DENY_NEGOTIATOR    = anonymous@*
    DENY_CLIENT        = anonymous@*
    SEC_DEFAULT_ENCRYPTION = OPTIONAL
    SEC_DEFAULT_INTEGRITY = REQUIRED
    SEC_READ_INTEGRITY = OPTIONAL
    SEC_CLIENT_INTEGRITY = OPTIONAL
    SEC_READ_ENCRYPTION = OPTIONAL
    SEC_CLIENT_ENCRYPTION = OPTIONAL
    HOSTALLOW_WRITE = *
    ALLOW_WRITE = $(HOSTALLOW_WRITE)
    

  • 01_gwms_collectors.config
    COLLECTOR_NAME = frontend_service
    COLLECTOR_HOST = $(CONDOR_HOST)
    COLLECTOR.USE_VOMS_ATTRIBUTES = False
    COLLECTOR_MAX_FILE_DESCRIPTORS=80000
    SCHEDD_MAX_FILE_DESCRIPTORS = 80000
    SHARED_PORT_MAX_FILE_DESCRIPTORS = 80000
    DAEMON_LIST   = $(DAEMON_LIST),  COLLECTOR, NEGOTIATOR
    NEGOTIATOR_POST_JOB_RANK = MY.LastHeardFrom
    NEGOTIATOR_INTERVAL = 60
    NEGOTIATOR_MAX_TIME_PER_SUBMITTER=60
    NEGOTIATOR_MAX_TIME_PER_PIESPIN=20
    PREEMPTION_REQUIREMENTS = False
    NEGOTIATOR_INFORM_STARTD = False
    NEGOTIATOR.USE_VOMS_ATTRIBUTES = False
    NEGOTIATOR_CONSIDER_PREEMPTION = False
    CONDOR_VIEW_HOST = $(COLLECTOR_HOST)
    

  • 03_gwms_local.config
    GSI_DAEMON_TRUSTED_CA_DIR= /etc/grid-security/certificates
    GSI_DAEMON_CERT =  /etc/grid-security/hostcert.pem
    GSI_DAEMON_KEY  =  /etc/grid-security/hostkey.pem
    CERTIFICATE_MAPFILE= /etc/condor/certs/condor_mapfile
    

  • 10_high_availability.config
    CONDOR_HOST=
    CENTRAL_MANAGER1 = test-012.t2.ucsd.edu
    CENTRAL_MANAGER2 = e143.chtc.wisc.edu
    COLLECTOR_HOST = $(CENTRAL_MANAGER1),$(CENTRAL_MANAGER2)
    HAD_PORT = 51450
    HAD_ARGS = -p $(HAD_PORT)
    REPLICATION_PORT = 41450
    REPLICATION_ARGS = -p $(REPLICATION_PORT)
    REPLICATION_LIST = \
                     $(CENTRAL_MANAGER1):$(REPLICATION_PORT), \
                     $(CENTRAL_MANAGER2):$(REPLICATION_PORT)
    HAD_LIST = \
             $(CENTRAL_MANAGER1):$(HAD_PORT), \
             $(CENTRAL_MANAGER2):$(HAD_PORT)
    HAD_CONNECTION_TIMEOUT = 5
    HAD_USE_PRIMARY = true
    HAD         = $(SBIN)/condor_had
    REPLICATION = $(SBIN)/condor_replication
    TRANSFERER  = $(SBIN)/condor_transferd
    DAEMON_LIST = $(DAEMON_LIST), HAD, REPLICATION
    HAD_USE_REPLICATION = true
    STATE_FILE = $(SPOOL)/Accountantnew.log
    REPLICATION_INTERVAL = 300
    MAX_TRANSFERER_LIFETIME = 300
    HAD_UPDATE_INTERVAL = 300
    MASTER_NEGOTIATOR_CONTROLLER    = HAD
    MASTER_HAD_BACKOFF_CONSTANT     = 360
    MAX_HAD_LOG = 640000
    HAD_DEBUG = D_COMMAND
    HAD_LOG = $(LOG)/HADLog
    MAX_REPLICATION_LOG = 640000
    REPLICATION_DEBUG = D_COMMAND
    REPLICATION_LOG = $(LOG)/ReplicationLog
    MAX_TRANSFERER_LOG = 640000
    TRANSFERER_DEBUG = D_COMMAND
    TRANSFERER_LOG = $(LOG)/TransferLog
    HOSTALLOW_ADMINISTRATOR = $(COLLECTOR_HOST)
    HOSTALLOW_NEGOTIATOR = $(COLLECTOR_HOST)
    

  • 11_gwms_secondary_collectors.config
    COLLECTOR9620 = $(COLLECTOR)
    COLLECTOR9620_ENVIRONMENT = "_CONDOR_COLLECTOR_LOG=$(LOG)/Collector9620Log"
    COLLECTOR9620_ARGS = -f -p 9620
    DAEMON_LIST=$(DAEMON_LIST), COLLECTOR9620
    # Similar entry through port 9819
    

  • 90_gwms_dns.config
    # Add certs to GSI_DAEMON_NAME with these subjects:
    # /DC=com/DC=DigiCert-Grid/O=Open Science Grid/OU=Services/CN=...
    #   test-010.t2.ucsd.edu, test-008.t2.ucsd.edu, test-003.t2.ucsd.edu,
    #   pilot01/test-008.t2.ucsd.edu, gtesterpilot01/test-001.t2.ucsd.edu,
    #   test-009.t2.ucsd.edu, test-004.t2.ucsd.edu, test-005.t2.ucsd.edu,
    #   test-006.t2.ucsd.edu, test-007.t2.ucsd.edu, cmssrv240.fnal.gov,
    #   test-002.t2.ucsd.edu, cmssrv241.fnal.gov
    

  • 96_scale_tweaks.config
    COLLECTOR_QUERY_WORKERS=16
    CONDOR_VIEW_HOST = 127.0.0.1
    MAX_FILE_DESCRIPTORS=10512
    NEGOTIATOR_MAX_TIME_PER_SUBMITTER=600
    NEGOTIATOR_MAX_TIME_PER_PIESPIN=500
    NEGOTIATOR_TRIM_SHUTDOWN_THRESHOLD=300
    NEGOTIATOR_MAX_TIME_PER_CYCLE=1500
    CLASSAD_LIFETIME=2200
    

Submit Machines

  • System Configurations:
    kernel.pid_max=131072
    /proc/sys/net/core/somaxconn to 1024
    
    

  • 00_gwms_general.config
    CONDOR_HOST=$(FULL_HOSTNAME)
    UID_DOMAIN=$(FULL_HOSTNAME)
    FILESYSTEM_DOMAIN=$(FULL_HOSTNAME)
    
    LOCK = $(LOG)
    
    DAEMON_LIST   = MASTER
    SEC_DAEMON_SESSION_DURATION = 50000
    
    SEC_DEFAULT_AUTHENTICATION = REQUIRED
    SEC_DEFAULT_AUTHENTICATION_METHODS = FS,GSI
    SEC_READ_AUTHENTICATION    = OPTIONAL
    SEC_CLIENT_AUTHENTICATION  = OPTIONAL
    DENY_WRITE         = anonymous@*
    DENY_ADMINISTRATOR = anonymous@*
    DENY_DAEMON        = anonymous@*
    DENY_NEGOTIATOR    = anonymous@*
    DENY_CLIENT        = anonymous@*
    
    SEC_DEFAULT_ENCRYPTION = OPTIONAL
    SEC_DEFAULT_INTEGRITY = REQUIRED
    SEC_READ_INTEGRITY = OPTIONAL
    SEC_CLIENT_INTEGRITY = OPTIONAL
    SEC_READ_ENCRYPTION = OPTIONAL
    SEC_CLIENT_ENCRYPTION = OPTIONAL
    
    HOSTALLOW_WRITE = *
    ALLOW_WRITE = $(HOSTALLOW_WRITE)
    
  • 02_gwms_schedds.config
    DAEMON_LIST   = $(DAEMON_LIST),  SCHEDD
    MAX_JOBS_RUNNING        = 30000
    JOB_START_DELAY = 2
    JOB_START_COUNT = 50
    JOB_STOP_DELAY = 1
    JOB_STOP_COUNT = 30
    
    MAX_CONCURRENT_UPLOADS = 100
    MAX_CONCURRENT_DOWNLOADS = 100
    
    APPEND_REQ_VANILLA = (Memory>=1) && (Disk>=1)
    JOB_DEFAULT_REQUESTMEMORY=1024
    JOB_DEFAULT_REQUESTDISK=1
    MAXJOBRETIREMENTTIME = $(HOUR) * 24 * 7
    SEC_ENABLE_MATCH_PASSWORD_AUTHENTICATION = TRUE
    SCHEDD_SEND_VACATE_VIA_TCP = True
    STARTD_SENDS_ALIVES = True
    ENABLE_USERLOG_FSYNC = False
    SHADOW.GLEXEC_STARTER = True
    SHADOW.GLEXEC = /bin/false
    MAX_SHADOW_LOG = 100000000
    SPOOL_DIR_STRING="$(SPOOL)"
    LOCAL_SCHEDD_DIR=/var/lib/condor
    SCHEDD_EXPRS = $(SCHEDD_EXPRS) SPOOL_DIR_STRING
    RESERVED_SWAP = 0
    SHADOW.USE_SHARED_PORT = True
    SCHEDD.USE_SHARED_PORT = True
    SHARED_PORT_MAX_WORKERS = 1000
    SHARED_PORT_ARGS = -p 9615
    DAEMON_LIST = $(DAEMON_LIST), SHARED_PORT
    JOB_Site               = "$$(GLIDEIN_Site:Unknown)"
    JOB_GLIDEIN_Entry_Name = "$$(GLIDEIN_Entry_Name:Unknown)"
    JOB_GLIDEIN_Name       = "$$(GLIDEIN_Name:Unknown)"
    JOB_GLIDEIN_Factory    = "$$(GLIDEIN_Factory:Unknown)"
    JOB_GLIDEIN_Schedd     = "$$(GLIDEIN_Schedd:Unknown)"
    JOB_GLIDEIN_ClusterId  = "$$(GLIDEIN_ClusterId:Unknown)"
    JOB_GLIDEIN_ProcId     = "$$(GLIDEIN_ProcId:Unknown)"
    JOB_GLIDEIN_Site       = "$$(GLIDEIN_Site:Unknown)"
    JOB_GLIDEIN_SiteWMS = "$$(GLIDEIN_SiteWMS:Unknown)"
    JOB_GLIDEIN_SiteWMS_Slot = "$$(GLIDEIN_SiteWMS_Slot:Unknown)"
    JOB_GLIDEIN_SiteWMS_JobId = "$$(GLIDEIN_SiteWMS_JobId:Unknown)"
    JOB_GLIDEIN_SiteWMS_Queue = "$$(GLIDEIN_SiteWMS_Queue:Unknown)"
    JOB_GLIDEIN_ATTRS = JOB_Site JOB_GLIDEIN_Entry_Name JOB_GLIDEIN_Name JOB_GLIDEIN_Factory JOB_GLIDEIN_Schedd JOB_GLIDEIN_ClusterId JOB_GLIDEIN_ProcId JOB_GLIDEIN_Site
    JOB_GLIDEIN_SITEWMS_ATTRS = JOB_GLIDEIN_SiteWMS JOB_GLIDEIN_SiteWMS_Slot JOB_GLIDEIN_SiteWMS_JobId JOB_GLIDEIN_SiteWMS_Queue
    JobAdInformationAttrs = $(JOB_GLIDEIN_ATTRS), $(JOB_GLIDEIN_SITEWMS_ATTRS)
    SUBMIT_EXPRS = $(SUBMIT_EXPRS) $(JOB_GLIDEIN_ATTRS) $(JOB_GLIDEIN_SITEWMS_ATTRS) JobAdInformationAttrs
    
  • 03_gwms_local.config
    GSI_DAEMON_TRUSTED_CA_DIR= /etc/grid-security/certificates
    
    GSI_DAEMON_CERT =  /etc/grid-security/hostcert.pem
    GSI_DAEMON_KEY  =  /etc/grid-security/hostkey.pem
    
    CERTIFICATE_MAPFILE= /etc/condor/certs/condor_mapfile
    
  • 10_high_availability.config
    CONDOR_HOST =
    
    CENTRAL_MANAGER1 = test-012.t2.ucsd.edu
    CENTRAL_MANAGER2 = e143.chtc.wisc.edu
    
    COLLECTOR_HOST = $(CENTRAL_MANAGER1),$(CENTRAL_MANAGER2)
    
    HOSTALLOW_ADMINISTRATOR = $(COLLECTOR_HOST)
    HOSTALLOW_NEGOTIATOR = $(COLLECTOR_HOST)
    
  • 95_scalability_tweaks.config
    MAX_DEFAULT_LOG = 100 Mb
    SHADOW_WORKLIFE = 24 * $(HOUR)
    SOCKET_LISTEN_BACKLOG = 1024
    SHADOW=$(SBIN)/condor_shadow_s
    
    CONDOR_Q_USE_V3_PROTOCOL = False
    

These are the submit machines:

  • e141.chtc.wisc.edu
  • e142.chtc.wisc.edu
  • test-003.t2.ucsd.edu
  • test-010.t2.ucsd.edu

Execute Machines