diff --git a/checks/system/unix.py b/checks/system/unix.py index 398787c14c..8cdbf2e8c9 100644 --- a/checks/system/unix.py +++ b/checks/system/unix.py @@ -57,7 +57,7 @@ def check(self, agentConfig): def parse_df_output(self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None): """ Parse the output of the df command. If use_volume is true the volume - is used to anchor the metric, otherwise false the mount + is used to anchor the metric, otherwise false the mount point is used. Returns a tuple of (disk, inode). """ usage_data = [] @@ -166,7 +166,7 @@ def keep_device(device): if blacklist_re and blacklist_re.match(device[0]): return False return True - + devices = filter(keep_device, flattened_devices) return devices @@ -217,7 +217,7 @@ def _parse_linux2(self, output): ioStats[device][headerName] = values[headerIndex] return ioStats - + def _parse_darwin(self, output): lines = [l.split() for l in output.split("\n") if len(l) > 0] disks = lines[0] @@ -229,7 +229,7 @@ def _parse_darwin(self, output): 'system.io.bytes_per_s': mb_s * 10**6, } return io - + def xlate(self, metric_name, os_name): """Standardize on linux metric names""" if os_name == "sunos": @@ -265,20 +265,20 @@ def check(self, agentConfig): stdout=sp.PIPE, close_fds=True).communicate()[0] - # Linux 2.6.32-343-ec2 (ip-10-35-95-10) 12/11/2012 _x86_64_ (2 CPU) + # Linux 2.6.32-343-ec2 (ip-10-35-95-10) 12/11/2012 _x86_64_ (2 CPU) # - # Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util - # sda1 0.00 17.61 0.26 32.63 4.23 201.04 12.48 0.16 4.81 0.53 1.73 - # sdb 0.00 2.68 0.19 3.84 5.79 26.07 15.82 0.02 4.93 0.22 0.09 - # sdg 0.00 0.13 2.29 3.84 100.53 30.61 42.78 0.05 8.41 0.88 0.54 - # sdf 0.00 0.13 2.30 3.84 100.54 30.61 42.78 0.06 9.12 0.90 0.55 - # md0 0.00 0.00 0.05 3.37 1.41 30.01 18.35 0.00 0.00 0.00 0.00 + # Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util + # sda1 0.00 17.61 0.26 32.63 4.23 201.04 12.48 0.16 4.81 0.53 1.73 + # sdb 0.00 2.68 0.19 3.84 5.79 26.07 15.82 0.02 4.93 0.22 0.09 + # sdg 0.00 0.13 2.29 3.84 100.53 30.61 42.78 0.05 8.41 0.88 0.54 + # sdf 0.00 0.13 2.30 3.84 100.54 30.61 42.78 0.06 9.12 0.90 0.55 + # md0 0.00 0.00 0.05 3.37 1.41 30.01 18.35 0.00 0.00 0.00 0.00 # - # Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util - # sda1 0.00 0.00 0.00 10.89 0.00 43.56 8.00 0.03 2.73 2.73 2.97 - # sdb 0.00 0.00 0.00 2.97 0.00 11.88 8.00 0.00 0.00 0.00 0.00 - # sdg 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - # sdf 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + # Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util + # sda1 0.00 0.00 0.00 10.89 0.00 43.56 8.00 0.03 2.73 2.73 2.97 + # sdb 0.00 0.00 0.00 2.97 0.00 11.88 8.00 0.00 0.00 0.00 0.00 + # sdg 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + # sdf 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 # md0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 io.update(self._parse_linux2(stdout)) @@ -296,12 +296,12 @@ def check(self, agentConfig): # device r/s w/s kr/s kw/s wait actv svc_t %w %b # ramdisk1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 # sd0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 - # sd1 0.0 139.0 0.0 1850.6 0.0 0.0 0.1 0 1 - + # sd1 0.0 139.0 0.0 1850.6 0.0 0.0 0.1 0 1 + # discard the first half of the display (stats since boot) lines = [l for l in iostat.split("\n") if len(l) > 0] lines = lines[len(lines)/2:] - + assert "extended device statistics" in lines[0] headers = lines[1].split() assert "device" in headers @@ -312,25 +312,25 @@ def check(self, agentConfig): io[cols[0]] = {} for i in range(1, len(cols)): io[cols[0]][self.xlate(headers[i], "sunos")] = cols[i] - + elif sys.platform.startswith("freebsd"): iostat = sp.Popen(["iostat", "-x", "-d", "1", "2"], stdout=sp.PIPE, close_fds=True).communicate()[0] - # Be careful! + # Be careful! # It looks like SunOS, but some columms (wait, svc_t) have different meaning - # extended device statistics - # device r/s w/s kr/s kw/s wait svc_t %b + # extended device statistics + # device r/s w/s kr/s kw/s wait svc_t %b # ad0 3.1 1.3 49.9 18.8 0 0.7 0 - # extended device statistics - # device r/s w/s kr/s kw/s wait svc_t %b + # extended device statistics + # device r/s w/s kr/s kw/s wait svc_t %b # ad0 0.0 2.0 0.0 31.8 0 0.2 0 - + # discard the first half of the display (stats since boot) lines = [l for l in iostat.split("\n") if len(l) > 0] lines = lines[len(lines)/2:] - + assert "extended device statistics" in lines[0] headers = lines[1].split() assert "device" in headers @@ -342,12 +342,12 @@ def check(self, agentConfig): for i in range(1, len(cols)): io[cols[0]][self.xlate(headers[i], "freebsd")] = cols[i] elif sys.platform == 'darwin': - iostat = sp.Popen(['iostat', '-d', '-c', '2', '-w', '1'], + iostat = sp.Popen(['iostat', '-d', '-c', '2', '-w', '1'], stdout=sp.PIPE, close_fds=True).communicate()[0] # disk0 disk1 <-- number of disks - # KB/t tps MB/s KB/t tps MB/s - # 21.11 23 0.47 20.01 0 0.00 + # KB/t tps MB/s KB/t tps MB/s + # 21.11 23 0.47 20.01 0 0.00 # 6.67 3 0.02 0.00 0 0.00 <-- line of interest io = self._parse_darwin(iostat) else: @@ -371,7 +371,7 @@ def check(self, agentConfig): class Load(Check): - + def check(self, agentConfig): if Platform.is_linux(): try: @@ -381,9 +381,9 @@ def check(self, agentConfig): except Exception: self.logger.exception('Cannot extract load') return False - + uptime = uptime[0] # readlines() provides a list but we want a string - + elif sys.platform in ('darwin', 'sunos5') or sys.platform.startswith("freebsd"): # Get output from uptime try: @@ -393,7 +393,7 @@ def check(self, agentConfig): except Exception: self.logger.exception('Cannot extract load') return False - + # Split out the 3 load average values load = [res.replace(',', '.') for res in re.findall(r'([0-9]+[\.,]\d+)', uptime)] # Normalize load by number of cores @@ -421,7 +421,7 @@ def __init__(self, logger): if sys.platform == 'darwin': macV = platform.mac_ver() macV_minor_version = int(re.match(r'10\.(\d+)\.?.*', macV[0]).group(1)) - + # Output from top is slightly modified on OS X 10.6 (case #28239) and greater if macV and (macV_minor_version >= 6): self.topIndex = 6 @@ -438,7 +438,7 @@ def __init__(self, logger): except Exception: # No page size available pass - + def check(self, agentConfig): if Platform.is_linux(): try: @@ -448,7 +448,7 @@ def check(self, agentConfig): except Exception: self.logger.exception('Cannot get memory metrics from /proc/meminfo') return False - + # $ cat /proc/meminfo # MemTotal: 7995360 kB # MemFree: 1045120 kB @@ -491,7 +491,7 @@ def check(self, agentConfig): # Hugepagesize: 2048 kB # DirectMap4k: 10112 kB # DirectMap2M: 8243200 kB - + regexp = re.compile(r'^(\w+):\s+([0-9]+)') # We run this several times so one-time compile now meminfo = {} @@ -502,9 +502,9 @@ def check(self, agentConfig): meminfo[match.group(1)] = match.group(2) except Exception: self.logger.exception("Cannot parse /proc/meminfo") - + memData = {} - + # Physical memory # FIXME units are in MB, we should use bytes instead try: @@ -522,7 +522,7 @@ def check(self, agentConfig): memData['physPctUsable'] = float(memData['physUsable']) / float(memData['physTotal']) except Exception: self.logger.exception('Cannot compute stats from /proc/meminfo') - + # Swap # FIXME units are in MB, we should use bytes instead try: @@ -530,14 +530,14 @@ def check(self, agentConfig): memData['swapFree'] = int(meminfo.get('SwapFree', 0)) / 1024 memData['swapUsed'] = memData['swapTotal'] - memData['swapFree'] - + if memData['swapTotal'] > 0: memData['swapPctFree'] = float(memData['swapFree']) / float(memData['swapTotal']) except Exception: self.logger.exception('Cannot compute swap stats') - - return memData - + + return memData + elif sys.platform == 'darwin': macV = platform.mac_ver() macV_minor_version = int(re.match(r'10\.(\d+)\.?.*', macV[0]).group(1)) @@ -548,11 +548,11 @@ def check(self, agentConfig): except StandardError: self.logger.exception('getMemoryUsage') return False - + # Deal with top lines = top.split('\n') physParts = re.findall(r'([0-9]\d+)', lines[self.topIndex]) - + # Deal with sysctl swapParts = re.findall(r'([0-9]+\.\d+)', sysctl) @@ -564,7 +564,7 @@ def check(self, agentConfig): physFreePartIndex = 2 return {'physUsed' : physParts[physUsedPartIndex], 'physFree' : physParts[physFreePartIndex], 'swapUsed' : swapParts[1], 'swapFree' : swapParts[2]} - + elif sys.platform.startswith("freebsd"): try: sysctl = sp.Popen(['sysctl', 'vm.stats.vm'], stdout=sp.PIPE, close_fds=True).communicate()[0] @@ -649,7 +649,7 @@ def check(self, agentConfig): memData['swapUsed'] += int(line[2]) except Exception: self.logger.exception('Cannot compute stats from swapinfo') - + return memData; elif sys.platform == 'sunos5': try: @@ -675,7 +675,7 @@ def check(self, agentConfig): # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swap 91828224 <-- # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swapcap 1073741824 <-- # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:zonename 53aa9b7e-48ba-4152-a52b-a6368c3d9e7c - + # turn memory_cap:360:zone_name:key value # into { "key": value, ...} kv = [l.strip().split() for l in kmem.split("\n") if len(l) > 0] @@ -701,29 +701,34 @@ def check(self, agentConfig): class Processes(Check): def check(self, agentConfig): + process_exclude_args = agentConfig.get('exclude_process_args', False) + if process_exclude_args: + ps_arg = 'aux' + else: + ps_arg = 'auxww' # Get output from ps try: - ps = sp.Popen(['ps', 'auxww'], stdout=sp.PIPE, close_fds=True).communicate()[0] + ps = sp.Popen(['ps', ps_arg], stdout=sp.PIPE, close_fds=True).communicate()[0] except StandardError: self.logger.exception('getProcesses') return False - + # Split out each process processLines = ps.split('\n') - + del processLines[0] # Removes the headers processLines.pop() # Removes a trailing empty line - + processes = [] - + for line in processLines: line = line.split(None, 10) processes.append(map(lambda s: s.strip(), line)) - + return { 'processes': processes, 'apiKey': agentConfig['api_key'], 'host': get_hostname(agentConfig) } - + class Cpu(Check): def check(self, agentConfig): @@ -737,7 +742,7 @@ def format_results(us, sy, wa, idle, st): del data[key] return data - + def get_value(legend, data, name, filter_value=None): "Using the legend and a metric name, get the value or None from the data line" if name in legend: @@ -780,12 +785,12 @@ def get_value(legend, data, name, filter_value=None): data = avg[0].split() # Userland - # Debian lenny says %user so we look for both + # Debian lenny says %user so we look for both # One of them will be 0 cpu_metrics = { - "%usr":None, "%user":None, "%nice":None, + "%usr":None, "%user":None, "%nice":None, "%iowait":None, "%idle":None, "%sys":None, - "%irq":None, "%soft":None, "%steal":None, + "%irq":None, "%soft":None, "%steal":None, } for cpu_m in cpu_metrics: @@ -802,15 +807,15 @@ def get_value(legend, data, name, filter_value=None): return format_results(cpu_user, cpu_system, - cpu_wait, + cpu_wait, cpu_idle, cpu_stolen) else: return False - + elif sys.platform == 'darwin': # generate 3 seconds of data - # [' disk0 disk1 cpu load average', ' KB/t tps MB/s KB/t tps MB/s us sy id 1m 5m 15m', ' 21.23 13 0.27 17.85 7 0.13 14 7 79 1.04 1.27 1.31', ' 4.00 3 0.01 5.00 8 0.04 12 10 78 1.04 1.27 1.31', ''] + # [' disk0 disk1 cpu load average', ' KB/t tps MB/s KB/t tps MB/s us sy id 1m 5m 15m', ' 21.23 13 0.27 17.85 7 0.13 14 7 79 1.04 1.27 1.31', ' 4.00 3 0.01 5.00 8 0.04 12 10 78 1.04 1.27 1.31', ''] iostats = sp.Popen(['iostat', '-C', '-w', '3', '-c', '2'], stdout=sp.PIPE, close_fds=True).communicate()[0] lines = [l for l in iostats.split("\n") if len(l) > 0] legend = [l for l in lines if "us" in l] @@ -915,7 +920,7 @@ def _get_subprocess_output(command, log): import time import pprint import re - + logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s') log = logging.getLogger() cpu = Cpu(log) diff --git a/datadog.conf.example b/datadog.conf.example index 940f97981d..34e7160b0a 100644 --- a/datadog.conf.example +++ b/datadog.conf.example @@ -72,6 +72,10 @@ use_mount: no # If enabled the collector will capture a metric for check run times. # check_timings: no +# If you want to remove the 'ww' flag from ps catching the arguments of processes +# for instance for security reasons +# exclude_process_args: no + # ========================================================================== # # DogStatsd configuration # # ========================================================================== # diff --git a/resources/processes.py b/resources/processes.py index 05942e49c7..fa2d115567 100644 --- a/resources/processes.py +++ b/resources/processes.py @@ -26,24 +26,29 @@ def describe_snapshot(self): def _get_proc_list(self): # Get output from ps try: - ps = subprocess.Popen(['ps', 'auxww'], stdout=subprocess.PIPE, close_fds=True).communicate()[0] + process_exclude_args = self.config.get('exclude_process_args', False) + if process_exclude_args: + ps_arg = 'aux' + else: + ps_arg = 'auxww' + ps = subprocess.Popen(['ps', ps_arg], stdout=subprocess.PIPE, close_fds=True).communicate()[0] except Exception, e: self.log.exception('Cannot get process list') return False - + # Split out each process processLines = ps.split('\n') - + del processLines[0] # Removes the headers processLines.pop() # Removes a trailing empty line - + processes = [] - + for line in processLines: line = line.split(None, 10) processes.append(map(lambda s: s.strip(), line)) - - return processes + + return processes @staticmethod def group_by_family(o): @@ -61,7 +66,7 @@ def _compute_family(command): return 'kernel' else: return (command.split()[0]).split('/')[-1] - + PSLine = namedtuple("PSLine","user,pid,pct_cpu,pct_mem,vsz,rss,tty,stat,started,time,command") self.start_snapshot()