Skip to content

Commit

Permalink
Add a flag to exclude process args
Browse files Browse the repository at this point in the history
  • Loading branch information
LeoCavaille committed Oct 20, 2014
1 parent 6e77d26 commit 7cf5eb1
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 73 deletions.
135 changes: 70 additions & 65 deletions checks/system/unix.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def check(self, agentConfig):
def parse_df_output(self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None):
"""
Parse the output of the df command. If use_volume is true the volume
is used to anchor the metric, otherwise false the mount
is used to anchor the metric, otherwise false the mount
point is used. Returns a tuple of (disk, inode).
"""
usage_data = []
Expand Down Expand Up @@ -166,7 +166,7 @@ def keep_device(device):
if blacklist_re and blacklist_re.match(device[0]):
return False
return True

devices = filter(keep_device, flattened_devices)

return devices
Expand Down Expand Up @@ -217,7 +217,7 @@ def _parse_linux2(self, output):
ioStats[device][headerName] = values[headerIndex]

return ioStats

def _parse_darwin(self, output):
lines = [l.split() for l in output.split("\n") if len(l) > 0]
disks = lines[0]
Expand All @@ -229,7 +229,7 @@ def _parse_darwin(self, output):
'system.io.bytes_per_s': mb_s * 10**6,
}
return io

def xlate(self, metric_name, os_name):
"""Standardize on linux metric names"""
if os_name == "sunos":
Expand Down Expand Up @@ -265,20 +265,20 @@ def check(self, agentConfig):
stdout=sp.PIPE,
close_fds=True).communicate()[0]

# Linux 2.6.32-343-ec2 (ip-10-35-95-10) 12/11/2012 _x86_64_ (2 CPU)
# Linux 2.6.32-343-ec2 (ip-10-35-95-10) 12/11/2012 _x86_64_ (2 CPU)
#
# Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util
# sda1 0.00 17.61 0.26 32.63 4.23 201.04 12.48 0.16 4.81 0.53 1.73
# sdb 0.00 2.68 0.19 3.84 5.79 26.07 15.82 0.02 4.93 0.22 0.09
# sdg 0.00 0.13 2.29 3.84 100.53 30.61 42.78 0.05 8.41 0.88 0.54
# sdf 0.00 0.13 2.30 3.84 100.54 30.61 42.78 0.06 9.12 0.90 0.55
# md0 0.00 0.00 0.05 3.37 1.41 30.01 18.35 0.00 0.00 0.00 0.00
# Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util
# sda1 0.00 17.61 0.26 32.63 4.23 201.04 12.48 0.16 4.81 0.53 1.73
# sdb 0.00 2.68 0.19 3.84 5.79 26.07 15.82 0.02 4.93 0.22 0.09
# sdg 0.00 0.13 2.29 3.84 100.53 30.61 42.78 0.05 8.41 0.88 0.54
# sdf 0.00 0.13 2.30 3.84 100.54 30.61 42.78 0.06 9.12 0.90 0.55
# md0 0.00 0.00 0.05 3.37 1.41 30.01 18.35 0.00 0.00 0.00 0.00
#
# Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util
# sda1 0.00 0.00 0.00 10.89 0.00 43.56 8.00 0.03 2.73 2.73 2.97
# sdb 0.00 0.00 0.00 2.97 0.00 11.88 8.00 0.00 0.00 0.00 0.00
# sdg 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
# sdf 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
# Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util
# sda1 0.00 0.00 0.00 10.89 0.00 43.56 8.00 0.03 2.73 2.73 2.97
# sdb 0.00 0.00 0.00 2.97 0.00 11.88 8.00 0.00 0.00 0.00 0.00
# sdg 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
# sdf 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
# md0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
io.update(self._parse_linux2(stdout))

Expand All @@ -296,12 +296,12 @@ def check(self, agentConfig):
# device r/s w/s kr/s kw/s wait actv svc_t %w %b
# ramdisk1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0
# sd0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0
# sd1 0.0 139.0 0.0 1850.6 0.0 0.0 0.1 0 1
# sd1 0.0 139.0 0.0 1850.6 0.0 0.0 0.1 0 1

# discard the first half of the display (stats since boot)
lines = [l for l in iostat.split("\n") if len(l) > 0]
lines = lines[len(lines)/2:]

assert "extended device statistics" in lines[0]
headers = lines[1].split()
assert "device" in headers
Expand All @@ -312,25 +312,25 @@ def check(self, agentConfig):
io[cols[0]] = {}
for i in range(1, len(cols)):
io[cols[0]][self.xlate(headers[i], "sunos")] = cols[i]

elif sys.platform.startswith("freebsd"):
iostat = sp.Popen(["iostat", "-x", "-d", "1", "2"],
stdout=sp.PIPE,
close_fds=True).communicate()[0]

# Be careful!
# Be careful!
# It looks like SunOS, but some columms (wait, svc_t) have different meaning
# extended device statistics
# device r/s w/s kr/s kw/s wait svc_t %b
# extended device statistics
# device r/s w/s kr/s kw/s wait svc_t %b
# ad0 3.1 1.3 49.9 18.8 0 0.7 0
# extended device statistics
# device r/s w/s kr/s kw/s wait svc_t %b
# extended device statistics
# device r/s w/s kr/s kw/s wait svc_t %b
# ad0 0.0 2.0 0.0 31.8 0 0.2 0

# discard the first half of the display (stats since boot)
lines = [l for l in iostat.split("\n") if len(l) > 0]
lines = lines[len(lines)/2:]

assert "extended device statistics" in lines[0]
headers = lines[1].split()
assert "device" in headers
Expand All @@ -342,12 +342,12 @@ def check(self, agentConfig):
for i in range(1, len(cols)):
io[cols[0]][self.xlate(headers[i], "freebsd")] = cols[i]
elif sys.platform == 'darwin':
iostat = sp.Popen(['iostat', '-d', '-c', '2', '-w', '1'],
iostat = sp.Popen(['iostat', '-d', '-c', '2', '-w', '1'],
stdout=sp.PIPE,
close_fds=True).communicate()[0]
# disk0 disk1 <-- number of disks
# KB/t tps MB/s KB/t tps MB/s
# 21.11 23 0.47 20.01 0 0.00
# KB/t tps MB/s KB/t tps MB/s
# 21.11 23 0.47 20.01 0 0.00
# 6.67 3 0.02 0.00 0 0.00 <-- line of interest
io = self._parse_darwin(iostat)
else:
Expand All @@ -371,7 +371,7 @@ def check(self, agentConfig):


class Load(Check):

def check(self, agentConfig):
if Platform.is_linux():
try:
Expand All @@ -381,9 +381,9 @@ def check(self, agentConfig):
except Exception:
self.logger.exception('Cannot extract load')
return False

uptime = uptime[0] # readlines() provides a list but we want a string

elif sys.platform in ('darwin', 'sunos5') or sys.platform.startswith("freebsd"):
# Get output from uptime
try:
Expand All @@ -393,7 +393,7 @@ def check(self, agentConfig):
except Exception:
self.logger.exception('Cannot extract load')
return False

# Split out the 3 load average values
load = [res.replace(',', '.') for res in re.findall(r'([0-9]+[\.,]\d+)', uptime)]
# Normalize load by number of cores
Expand Down Expand Up @@ -421,7 +421,7 @@ def __init__(self, logger):
if sys.platform == 'darwin':
macV = platform.mac_ver()
macV_minor_version = int(re.match(r'10\.(\d+)\.?.*', macV[0]).group(1))

# Output from top is slightly modified on OS X 10.6 (case #28239) and greater
if macV and (macV_minor_version >= 6):
self.topIndex = 6
Expand All @@ -438,7 +438,7 @@ def __init__(self, logger):
except Exception:
# No page size available
pass

def check(self, agentConfig):
if Platform.is_linux():
try:
Expand All @@ -448,7 +448,7 @@ def check(self, agentConfig):
except Exception:
self.logger.exception('Cannot get memory metrics from /proc/meminfo')
return False

# $ cat /proc/meminfo
# MemTotal: 7995360 kB
# MemFree: 1045120 kB
Expand Down Expand Up @@ -491,7 +491,7 @@ def check(self, agentConfig):
# Hugepagesize: 2048 kB
# DirectMap4k: 10112 kB
# DirectMap2M: 8243200 kB

regexp = re.compile(r'^(\w+):\s+([0-9]+)') # We run this several times so one-time compile now
meminfo = {}

Expand All @@ -502,9 +502,9 @@ def check(self, agentConfig):
meminfo[match.group(1)] = match.group(2)
except Exception:
self.logger.exception("Cannot parse /proc/meminfo")

memData = {}

# Physical memory
# FIXME units are in MB, we should use bytes instead
try:
Expand All @@ -522,22 +522,22 @@ def check(self, agentConfig):
memData['physPctUsable'] = float(memData['physUsable']) / float(memData['physTotal'])
except Exception:
self.logger.exception('Cannot compute stats from /proc/meminfo')

# Swap
# FIXME units are in MB, we should use bytes instead
try:
memData['swapTotal'] = int(meminfo.get('SwapTotal', 0)) / 1024
memData['swapFree'] = int(meminfo.get('SwapFree', 0)) / 1024

memData['swapUsed'] = memData['swapTotal'] - memData['swapFree']

if memData['swapTotal'] > 0:
memData['swapPctFree'] = float(memData['swapFree']) / float(memData['swapTotal'])
except Exception:
self.logger.exception('Cannot compute swap stats')
return memData

return memData

elif sys.platform == 'darwin':
macV = platform.mac_ver()
macV_minor_version = int(re.match(r'10\.(\d+)\.?.*', macV[0]).group(1))
Expand All @@ -548,11 +548,11 @@ def check(self, agentConfig):
except StandardError:
self.logger.exception('getMemoryUsage')
return False

# Deal with top
lines = top.split('\n')
physParts = re.findall(r'([0-9]\d+)', lines[self.topIndex])

# Deal with sysctl
swapParts = re.findall(r'([0-9]+\.\d+)', sysctl)

Expand All @@ -564,7 +564,7 @@ def check(self, agentConfig):
physFreePartIndex = 2

return {'physUsed' : physParts[physUsedPartIndex], 'physFree' : physParts[physFreePartIndex], 'swapUsed' : swapParts[1], 'swapFree' : swapParts[2]}

elif sys.platform.startswith("freebsd"):
try:
sysctl = sp.Popen(['sysctl', 'vm.stats.vm'], stdout=sp.PIPE, close_fds=True).communicate()[0]
Expand Down Expand Up @@ -649,7 +649,7 @@ def check(self, agentConfig):
memData['swapUsed'] += int(line[2])
except Exception:
self.logger.exception('Cannot compute stats from swapinfo')

return memData;
elif sys.platform == 'sunos5':
try:
Expand All @@ -675,7 +675,7 @@ def check(self, agentConfig):
# memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swap 91828224 <--
# memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swapcap 1073741824 <--
# memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:zonename 53aa9b7e-48ba-4152-a52b-a6368c3d9e7c

# turn memory_cap:360:zone_name:key value
# into { "key": value, ...}
kv = [l.strip().split() for l in kmem.split("\n") if len(l) > 0]
Expand All @@ -701,29 +701,34 @@ def check(self, agentConfig):
class Processes(Check):

def check(self, agentConfig):
process_exclude_args = agentConfig.get('exclude_process_args', False)
if process_exclude_args:
ps_arg = 'aux'
else:
ps_arg = 'auxww'
# Get output from ps
try:
ps = sp.Popen(['ps', 'auxww'], stdout=sp.PIPE, close_fds=True).communicate()[0]
ps = sp.Popen(['ps', ps_arg], stdout=sp.PIPE, close_fds=True).communicate()[0]
except StandardError:
self.logger.exception('getProcesses')
return False

# Split out each process
processLines = ps.split('\n')

del processLines[0] # Removes the headers
processLines.pop() # Removes a trailing empty line

processes = []

for line in processLines:
line = line.split(None, 10)
processes.append(map(lambda s: s.strip(), line))

return { 'processes': processes,
'apiKey': agentConfig['api_key'],
'host': get_hostname(agentConfig) }

class Cpu(Check):

def check(self, agentConfig):
Expand All @@ -737,7 +742,7 @@ def format_results(us, sy, wa, idle, st):
del data[key]
return data


def get_value(legend, data, name, filter_value=None):
"Using the legend and a metric name, get the value or None from the data line"
if name in legend:
Expand Down Expand Up @@ -780,12 +785,12 @@ def get_value(legend, data, name, filter_value=None):
data = avg[0].split()

# Userland
# Debian lenny says %user so we look for both
# Debian lenny says %user so we look for both
# One of them will be 0
cpu_metrics = {
"%usr":None, "%user":None, "%nice":None,
"%usr":None, "%user":None, "%nice":None,
"%iowait":None, "%idle":None, "%sys":None,
"%irq":None, "%soft":None, "%steal":None,
"%irq":None, "%soft":None, "%steal":None,
}

for cpu_m in cpu_metrics:
Expand All @@ -802,15 +807,15 @@ def get_value(legend, data, name, filter_value=None):

return format_results(cpu_user,
cpu_system,
cpu_wait,
cpu_wait,
cpu_idle,
cpu_stolen)
else:
return False

elif sys.platform == 'darwin':
# generate 3 seconds of data
# [' disk0 disk1 cpu load average', ' KB/t tps MB/s KB/t tps MB/s us sy id 1m 5m 15m', ' 21.23 13 0.27 17.85 7 0.13 14 7 79 1.04 1.27 1.31', ' 4.00 3 0.01 5.00 8 0.04 12 10 78 1.04 1.27 1.31', '']
# [' disk0 disk1 cpu load average', ' KB/t tps MB/s KB/t tps MB/s us sy id 1m 5m 15m', ' 21.23 13 0.27 17.85 7 0.13 14 7 79 1.04 1.27 1.31', ' 4.00 3 0.01 5.00 8 0.04 12 10 78 1.04 1.27 1.31', '']
iostats = sp.Popen(['iostat', '-C', '-w', '3', '-c', '2'], stdout=sp.PIPE, close_fds=True).communicate()[0]
lines = [l for l in iostats.split("\n") if len(l) > 0]
legend = [l for l in lines if "us" in l]
Expand Down Expand Up @@ -915,7 +920,7 @@ def _get_subprocess_output(command, log):
import time
import pprint
import re

logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s')
log = logging.getLogger()
cpu = Cpu(log)
Expand Down
4 changes: 4 additions & 0 deletions datadog.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ use_mount: no
# If enabled the collector will capture a metric for check run times.
# check_timings: no

# If you want to remove the 'ww' flag from ps catching the arguments of processes
# for instance for security reasons
# exclude_process_args: no

# ========================================================================== #
# DogStatsd configuration #
# ========================================================================== #
Expand Down
Loading

0 comments on commit 7cf5eb1

Please sign in to comment.