diff --git a/curvefs/docker/debian9/base/Dockerfile b/curvefs/docker/debian9/base/Dockerfile index 9fb1a4283d..78ee19ea9d 100644 --- a/curvefs/docker/debian9/base/Dockerfile +++ b/curvefs/docker/debian9/base/Dockerfile @@ -1,4 +1,4 @@ -FROM debian:9 +FROM opencurvedocker/curve-base:debian9 RUN echo "deb http://mirrors.163.com/debian/ stretch main\n" \ "deb http://mirrors.163.com/debian/ stretch-updates main non-free contrib\n" \ @@ -27,6 +27,9 @@ RUN echo "deb http://mirrors.163.com/debian/ stretch main\n" \ zlib1g-dev \ libsnappy-dev \ liblz4-dev \ + python \ + wget \ + python3 \ && wget https://curve-build.nos-eastchina1.126.net/curve-base.tar.gz \ && tar -xzvf curve-base.tar.gz COPY fusermount3 /usr/local/bin diff --git a/curvefs/docker/debian9/entrypoint.sh b/curvefs/docker/debian9/entrypoint.sh index f0d82ae547..e95e09ed6d 100755 --- a/curvefs/docker/debian9/entrypoint.sh +++ b/curvefs/docker/debian9/entrypoint.sh @@ -87,6 +87,10 @@ function prepare() { g_binary="$g_prefix/sbin/curve-fuse" g_start_args="--confPath $conf_path" ;; + monitor) + g_binary="python3" + g_start_args="target_json.py" + ;; *) usage exit 1 @@ -117,6 +121,9 @@ function main() { [[ $(command -v crontab) ]] && cron if [ $g_role == "etcd" ]; then exec $g_binary $g_start_args >>$g_prefix/logs/etcd.log 2>&1 + elif [ $g_role == "monitor" ]; then + cd $g_prefix + exec $g_binary $g_start_args else exec $g_binary $g_start_args fi diff --git a/curvefs/monitor/README.md b/curvefs/monitor/README.md index 409607e862..0145ede829 100644 --- a/curvefs/monitor/README.md +++ b/curvefs/monitor/README.md @@ -32,13 +32,11 @@ monitor ## 使用说明 -以下步骤为不使用puppet进行部署的过程。 - ### 环境初始化 1.部署监控系统的机器需要安装如下组件: -node_exporter、docker、docker-compose、jq +docker、docker-compose、jq * docker安装 @@ -67,19 +65,6 @@ apt-get install docker-ce-cli apt-get install docker-compose ``` -* node_exporter - - 可能很多节点都要安装,可以用脚本来一起装,如下面的方式: - - ``` - for i in {1..4}; - do - scp -P 1046 ~/Downloads/node_exporter-0.18.1.linux-amd64.tar.gz yangyaokai@pubt1-curve$i.yq.163.org:~/ - ssh -p 1046 yangyaokai@pubt1-curve$i.yq.163.org "tar zxvf node_exporter-0.18.1.linux-amd64.tar.gz ; cd node_exporter-0.18.1.linux-amd64 ; nohup ./node_exporter >/dev/null 2>log &" - echo $i - done - ``` - * jq update_dashboard.sh脚本需要依赖jq命令,这个一般机器上都没装 @@ -88,9 +73,6 @@ apt-get install docker-ce-cli apt-get install jq ``` -2.chunkserver上安装node_exporter(机器监控可以依赖哨兵,可以不装) - - ### 部署监控系统 * 修改相关配置 @@ -107,25 +89,20 @@ apt-get install docker-ce-cli ```curve-monitor.sh start ``` -* 部署grafana每日报表 +# grafana每日报表 -crontab配置定时任务,添加如下任务: -30 8 * * * python /etc/curve/monitor/grafana-report.py >> /etc/curve/monitor/cron.log 2>&1 -如果机器上没有配置其他的定时任务,可直接用下面命令 -echo "30 8 * * * python /etc/curve/monitor/grafana-report.py >> /etc/curve/monitor/cron.log 2>&1" >> conf && crontab conf && rm -f conf - -#### 对接puppet - -如果对接puppet,配置相关文件都会放到puppet上,配置的变更都要上传到puppet上。 - -puppet上管理的配置包括:docker-compose.yml、target.ini、grafana.ini、prometheus.yml - -通过安装包安装完curve-monitor以后,会将curve-monitor.sh拷贝到/usr/bin目录下,可以通过以下命令管理监控系统: +每日报表需要设置定时任务,通过 grafana-report.py 来发送邮件。 -启动:```curve-monitor.sh start``` +请修改 grafana-report.py 文件(13~25行)中的内容(包括发件人,收件人,用户名和密码等)。 -停止:```curve-monitor.sh stop``` +此外 grafana-report.py 的运行需要依赖一些第三方库,请参照文件内容安装相关库。 -重启:```curve-monitor.sh restart``` +```bash +sudo apt install python-pip +pip install email +``` -上面环境初始化中的依赖的包puppet基本都会帮忙安装,除了node_exporter需要自己安装。 +crontab配置定时任务,添加如下任务: +30 8 ** *python /etc/curve/monitor/grafana-report.py >> /etc/curve/monitor/cron.log 2>&1 +如果机器上没有配置其他的定时任务,可直接用下面命令 +echo "30 8 * * * python /etc/curve/monitor/grafana-report.py >> /etc/curve/monitor/cron.log 2>&1" >> conf && crontab conf && rm -f conf diff --git a/curvefs/monitor/curve-monitor.sh b/curvefs/monitor/curve-monitor.sh index 8a7aa3950f..7e2a9c9752 100644 --- a/curvefs/monitor/curve-monitor.sh +++ b/curvefs/monitor/curve-monitor.sh @@ -19,9 +19,6 @@ start() { echo "" > monitor.log - stdbuf -oL python3 target_json.py >> monitor.log 2>&1 & - echo "start prometheus targets service success!" - docker-compose up >> monitor.log 2>&1 & echo "start metric system success!" } diff --git a/curvefs/monitor/docker-compose.yml b/curvefs/monitor/docker-compose.yml index 050c660835..c9a5a5a079 100644 --- a/curvefs/monitor/docker-compose.yml +++ b/curvefs/monitor/docker-compose.yml @@ -3,7 +3,7 @@ version: '2.0' services: prometheus: - image: prom/prometheus:v2.28.1 + image: prom/prometheus:latest volumes: - ./prometheus/:/etc/prometheus/:rw - ./prometheus/data:/prometheus:rw @@ -18,13 +18,14 @@ services: network_mode: host grafana: - image: grafana/grafana:8.0.6 + image: grafana/grafana depends_on: - prometheus network_mode: host volumes: - ./grafana/data:/var/lib/grafana:rw - ./grafana/grafana.ini:/etc/grafana/grafana.ini:rw + - ./grafana/provisioning:/etc/grafana/provisioning:rw environment: - GF_INSTALL_PLUGINS=grafana-piechart-panel - GF_SECURITY_ADMIN_USER=admin diff --git a/curvefs/monitor/grafana-report.py b/curvefs/monitor/grafana-report.py index a400263e8c..016473a509 100644 --- a/curvefs/monitor/grafana-report.py +++ b/curvefs/monitor/grafana-report.py @@ -15,9 +15,9 @@ username = 'xxxxxxxxx@163.com' password = 'xxxxxxxxx' # SMTP授权码 smtpserver = 'xxxx.163.com:1234' -sourcefile= '/etc/curve/monitor/grafana/report/report.tex' -imagedir= '/etc/curve/monitor/grafana/report/images/' -pdfpath= '/etc/curve/monitor/grafana/report/report.pdf' +sourcefile= '/etc/curvefs/monitor/grafana/report/report.tex' +imagedir= '/etc/curvefs/monitor/grafana/report/images/' +pdfpath= '/etc/curvefs/monitor/grafana/report/report.pdf' clustername = '【CURVE】xxxxxxxxx' grafanauri = '127.0.0.1:3000' reporteruri = '127.0.0.1:8686' diff --git a/curvefs/monitor/target_json.py b/curvefs/monitor/target_json.py index fc58a50c00..3b36e2e617 100644 --- a/curvefs/monitor/target_json.py +++ b/curvefs/monitor/target_json.py @@ -39,23 +39,27 @@ def loadServer(): if ret == 0: with open(JSON_PATH) as load_f: data = json.load(load_f) - servers = [] + metaservers = [] if data is not None: for pool in data["poollist"]: for zone in pool["zonelist"]: for server in zone["serverlist"]: - servers.append(server) - return servers + for metaserver in server["metaserverlist"]: + metaservers.append(metaserver) + return metaservers def loadClient(): ret, output = runCurvefsToolCommand(["list-fs"]) clients = [] + label = lablesValue(None, "client") if ret == 0 : - data = json.loads(output.decode()) + try: + data = json.loads(output.decode()) + except json.decoder.JSONDecodeError: + return unitValue(label, clients) for fsinfo in data["fsInfo"]: for mountpoint in fsinfo["mountpoints"]: clients.append(mountpoint["hostname"] + ":" + str(mountpoint["port"])) - label = lablesValue(None, "client") return unitValue(label, clients) def loadType(hostType): @@ -70,9 +74,10 @@ def ipPort2Addr(ip, port): return str(ip) + ":" + str(port) def server2Target(server): - labels = lablesValue(server["hostname"], "metaserver") + hostname = server["hostname"] + "." + str(server["metaserverid"]) + labels = lablesValue(hostname, "metaserver") serverAddr = [] - serverAddr.append(ipPort2Addr(server["internalip"], server["internalport"])) + serverAddr.append(ipPort2Addr(server["externalip"], server["externalport"])) targets = list(set(serverAddr)) return unitValue(labels, targets) diff --git a/curvefs/src/tools/list/curvefs_fsinfo_list.cpp b/curvefs/src/tools/list/curvefs_fsinfo_list.cpp index 0df30a8c39..01d42f1d88 100644 --- a/curvefs/src/tools/list/curvefs_fsinfo_list.cpp +++ b/curvefs/src/tools/list/curvefs_fsinfo_list.cpp @@ -75,6 +75,7 @@ bool FsInfoListTool::AfterSendRequestToHost(const std::string& host) { } else if (show_) { if (response_->fsinfo().empty()) { std::cout << "no fs in cluster." << std::endl; + return true; } std::string output; diff --git a/curvefs/util/image.sh b/curvefs/util/image.sh index 41b41dc2ba..67691562c3 100644 --- a/curvefs/util/image.sh +++ b/curvefs/util/image.sh @@ -20,6 +20,7 @@ prefix="$(pwd)/docker/$2/curvefs" mkdir -p $prefix $prefix/conf make install prefix="$prefix" make install prefix="$prefix" only=etcd +make install prefix="$prefix" only=monitor for file in `ls conf`; do dsv="=" diff --git a/curvefs/util/install.sh b/curvefs/util/install.sh index be8531d50d..f6560d6ffd 100644 --- a/curvefs/util/install.sh +++ b/curvefs/util/install.sh @@ -234,6 +234,21 @@ install_etcd() { success "install $project_name success\n" } +install_monitor() { + local project_name="monitor" + g_project_name=$project_name + + local project_prefix="$g_prefix/monitor" + local dst="monitor" + mkdir -p $project_prefix + mkdir -p "$project_prefix/prometheus" + mkdir -p "$project_prefix/data" + copy_file "$dst/target_json.py" "$project_prefix" + copy_file "$dst/target.ini" "$project_prefix" + + success "install $project_name success\n" +} + main() { get_options "$@" get_build_mode @@ -243,6 +258,8 @@ main() { exit 1 elif [ "$g_only" == "etcd" ]; then install_etcd + elif [ "$g_only" == "monitor" ]; then + install_monitor else install_curvefs fi diff --git a/docker/debian9/base/Dockerfile b/docker/debian9/base/Dockerfile index f33e3eff3c..67e2f52228 100644 --- a/docker/debian9/base/Dockerfile +++ b/docker/debian9/base/Dockerfile @@ -1,4 +1,4 @@ -FROM debian:9 +FROM opencurvedocker/curve-base:debian9 RUN echo "deb http://mirrors.163.com/debian/ stretch main\n" \ "deb http://mirrors.163.com/debian/ stretch-updates main non-free contrib\n" \ @@ -27,6 +27,9 @@ RUN echo "deb http://mirrors.163.com/debian/ stretch main\n" \ zlib1g-dev \ libsnappy-dev \ liblz4-dev \ + python \ + wget \ + python3 \ && wget https://curve-build.nos-eastchina1.126.net/curve-base.tar.gz \ && tar -xzvf curve-base.tar.gz diff --git a/docker/debian9/entrypoint.sh b/docker/debian9/entrypoint.sh index 30c7bba688..cb150b4b86 100644 --- a/docker/debian9/entrypoint.sh +++ b/docker/debian9/entrypoint.sh @@ -93,6 +93,10 @@ function prepare() { g_binary="$g_prefix/sbin/nebd-server" g_start_args="-confPath=$g_prefix/conf/nebd-server.conf -log_dir=$g_prefix/logs" ;; + monitor) + g_binary="python" + g_start_args="target_json.py" + ;; *) usage exit 1 @@ -122,6 +126,9 @@ function main() { [[ ! -z $g_preexec ]] && $g_preexec if [ $g_role == "etcd" ]; then exec $g_binary $g_start_args >>$g_prefix/logs/etcd.log 2>&1 + elif [ $g_role == "monitor" ]; then + cd $g_prefix + exec $g_binary $g_start_args else exec $g_binary $g_start_args fi diff --git a/monitor/README.md b/monitor/README.md index 27d81ef1be..8a81987d3f 100644 --- a/monitor/README.md +++ b/monitor/README.md @@ -57,13 +57,11 @@ prometheus的配置文件 ## 使用说明 -以下步骤为不使用puppet进行部署的过程。 - #### 环境初始化 1.部署监控系统的机器需要安装如下组件: -node_exporter、docker、docker-compose、jq +docker、docker-compose、jq * docker安装 @@ -92,19 +90,6 @@ apt-get install docker-ce-cli apt-get install docker-compose ``` -* node_exporter - - 可能很多节点都要安装,可以用脚本来一起装,如下面的方式: - - ``` - for i in {1..4}; - do - scp -P 1046 ~/Downloads/node_exporter-0.18.1.linux-amd64.tar.gz yangyaokai@pubt1-curve$i.yq.163.org:~/ - ssh -p 1046 yangyaokai@pubt1-curve$i.yq.163.org "tar zxvf node_exporter-0.18.1.linux-amd64.tar.gz ; cd node_exporter-0.18.1.linux-amd64 ; nohup ./node_exporter >/dev/null 2>log &" - echo $i - done - ``` - * jq update_dashboard.sh脚本需要依赖jq命令,这个一般机器上都没装 @@ -113,9 +98,6 @@ apt-get install docker-ce-cli apt-get install jq ``` -2.chunkserver上安装node_exporter(机器监控可以依赖哨兵,可以不装) - - #### 部署监控系统 * 修改相关配置 @@ -139,21 +121,3 @@ crontab配置定时任务,添加如下任务: 如果机器上没有配置其他的定时任务,可直接用下面命令 echo "30 8 * * * python /etc/curve/monitor/report.py >> /etc/curve/monitor/cron.log 2>&1" >> conf && crontab conf && rm -f conf - - - -#### 对接puppet - -如果对接puppet,配置相关文件都会放到puppet上,配置的变更都要上传到puppet上。 - -puppet上管理的配置包括:docker-compose.yml、target.ini、grafana.ini、prometheus.yml - -通过安装包安装完curve-monitor以后,会将curve-monitor.sh拷贝到/usr/bin目录下,可以通过以下命令管理监控系统: - -启动:```curve-monitor.sh start``` - -停止:```curve-monitor.sh stop``` - -重启:```curve-monitor.sh restart``` - -上面环境初始化中的依赖的包puppet基本都会帮忙安装,除了node_exporter需要自己安装。 diff --git a/monitor/curve-monitor.sh b/monitor/curve-monitor.sh index 75894bb70a..3eb4781449 100644 --- a/monitor/curve-monitor.sh +++ b/monitor/curve-monitor.sh @@ -19,9 +19,6 @@ start() { echo "" > monitor.log - stdbuf -oL python target_json.py >> monitor.log 2>&1 & - echo "start prometheus targets service success!" - docker-compose up >> monitor.log 2>&1 & echo "start metric system success!" } diff --git a/monitor/target_json.py b/monitor/target_json.py index 98e39aae96..81b91f4a96 100644 --- a/monitor/target_json.py +++ b/monitor/target_json.py @@ -61,12 +61,6 @@ def refresh(): 'targets': [chunkserverip[i]+':'+chunkserverport[i]], }) - # add node_exporter targets - targets.append({ - 'labels': {'job': "node_exporter"}, - 'targets': [t+':9100' for t in chunkserverip], - }) - # get client's ip and port curve_ops_tool_res = commands.getstatusoutput("curve_ops_tool client-list -listClientInRepo=true") if curve_ops_tool_res[0] != 0: diff --git a/util/image.sh b/util/image.sh index 508dae2072..2b7d526e73 100644 --- a/util/image.sh +++ b/util/image.sh @@ -19,6 +19,7 @@ prefix="$(pwd)/docker/$2/curvebs" mkdir -p $prefix $prefix/conf make install prefix="$prefix" make install prefix="$prefix" only=etcd +make install prefix="$prefix" only=monitor for path in `ls conf/* nebd/etc/nebd/*`; do dir=`dirname $path` diff --git a/util/install.sh b/util/install.sh index 990be7fada..a57e8ffd60 100644 --- a/util/install.sh +++ b/util/install.sh @@ -223,6 +223,21 @@ install_etcd() { success "install $project_name success\n" } +install_monitor() { + local project_name="monitor" + g_project_name=$project_name + + local project_prefix="$g_prefix/monitor" + local dst="monitor" + mkdir -p $project_prefix + mkdir -p "$project_prefix/prometheus" + mkdir -p "$project_prefix/data" + copy_file "$dst/target_json.py" "$project_prefix" + copy_file "$dst/target.ini" "$project_prefix" + + success "install $project_name success\n" +} + main() { get_options "$@" @@ -231,6 +246,8 @@ main() { exit 1 elif [ "$g_only" == "etcd" ]; then install_etcd + elif [ "$g_only" == "monitor" ]; then + install_monitor else install_curvebs fi