Module | MonitorServers |
In: |
lib/capistrano/ext/monitor.rb
lib/capistrano/ext/monitor.rb |
LONG_TIME_FORMAT | = | "%Y-%m-%d %H:%M:%S" |
SHORT_TIME_FORMAT | = | "%H:%M:%S" |
LONG_TIME_FORMAT | = | "%Y-%m-%d %H:%M:%S" |
SHORT_TIME_FORMAT | = | "%H:%M:%S" |
A helper method for encapsulating the behavior of the date/time column in a report.
# File lib/capistrano/ext/monitor.rb, line 10 10: def date_column(operation, *args) 11: case operation 12: when :init 13: { :width => Time.now.strftime(LONG_TIME_FORMAT).length, 14: :last => nil, 15: :rows => 0 } 16: when :show 17: state = args.first 18: now = Time.now 19: date = now.strftime( 20: (state[:rows] % 10 == 0 || now.day != state[:last].day) ? 21: LONG_TIME_FORMAT : SHORT_TIME_FORMAT) 22: state[:last] = now 23: state[:rows] += 1 24: "%*s" % [state[:width], date] 25: else 26: raise "unknown operation #{operation.inspect}" 27: end 28: end
A helper method for encapsulating the behavior of the date/time column in a report.
# File lib/capistrano/ext/monitor.rb, line 10 10: def date_column(operation, *args) 11: case operation 12: when :init 13: { :width => Time.now.strftime(LONG_TIME_FORMAT).length, 14: :last => nil, 15: :rows => 0 } 16: when :show 17: state = args.first 18: now = Time.now 19: date = now.strftime( 20: (state[:rows] % 10 == 0 || now.day != state[:last].day) ? 21: LONG_TIME_FORMAT : SHORT_TIME_FORMAT) 22: state[:last] = now 23: state[:rows] += 1 24: "%*s" % [state[:width], date] 25: else 26: raise "unknown operation #{operation.inspect}" 27: end 28: end
A helper method for formatting table headers in a report.
# File lib/capistrano/ext/monitor.rb, line 31 31: def headers(*args) 32: 0.step(args.length-1, 2) do |n| 33: header = args[n] 34: size = args[n+1] 35: if header == "-" || header == " " 36: print header * size, " " 37: else 38: print header 39: padding = size - header.length 40: print " " if padding > 0 41: print "-" * (padding - 1) if padding > 1 42: print " " 43: end 44: end 45: puts 46: end
A helper method for formatting table headers in a report.
# File lib/capistrano/ext/monitor.rb, line 31 31: def headers(*args) 32: 0.step(args.length-1, 2) do |n| 33: header = args[n] 34: size = args[n+1] 35: if header == "-" || header == " " 36: print header * size, " " 37: else 38: print header 39: padding = size - header.length 40: print " " if padding > 0 41: print "-" * (padding - 1) if padding > 1 42: print " " 43: end 44: end 45: puts 46: end
Monitor the load of the servers tied to the current task.
# File lib/capistrano/ext/monitor.rb, line 59 59: def load(options={}) 60: servers = current_task.servers.sort 61: names = servers.map { |s| s.match(/^([^.]+)/)[1] } 62: time = date_column(:init) 63: load_column_width = "0.00".length * 3 + 2 64: 65: puts "connecting..." 66: connect! 67: 68: parser = Proc.new { |text| text.match(/average.*: (.*)$/)[1].split(/, /) } 69: delay = (options[:delay] || 30).to_i 70: 71: running = true 72: trap("INT") { running = false; puts "[stopping]" } 73: 74: # THE HEADER 75: header = Proc.new do 76: puts 77: headers("-", time[:width], *names.map { |n| [n, load_column_width] }.flatten) 78: end 79: 80: while running 81: uptimes = {} 82: run "uptime" do |ch, stream, data| 83: raise "error: #{data}" if stream == :err 84: uptimes[ch[:host]] = parser[data.strip] 85: end 86: 87: # redisplay the header every 40 rows 88: header.call if time[:rows] % 40 == 0 89: 90: print(date_column(:show, time), " ") 91: servers.each { |server| print(uptimes[server].join("/"), " ") } 92: puts 93: 94: # sleep this way, so that CTRL-C works immediately 95: delay.times { sleep 1; break unless running } 96: end 97: end
Monitor the load of the servers tied to the current task.
# File lib/capistrano/ext/monitor.rb, line 59 59: def load(options={}) 60: servers = current_task.servers.sort 61: names = servers.map { |s| s.match(/^([^.]+)/)[1] } 62: time = date_column(:init) 63: load_column_width = "0.00".length * 3 + 2 64: 65: puts "connecting..." 66: connect! 67: 68: parser = Proc.new { |text| text.match(/average.*: (.*)$/)[1].split(/, /) } 69: delay = (options[:delay] || 30).to_i 70: 71: running = true 72: trap("INT") { running = false; puts "[stopping]" } 73: 74: # THE HEADER 75: header = Proc.new do 76: puts 77: headers("-", time[:width], *names.map { |n| [n, load_column_width] }.flatten) 78: end 79: 80: while running 81: uptimes = {} 82: run "uptime" do |ch, stream, data| 83: raise "error: #{data}" if stream == :err 84: uptimes[ch[:host]] = parser[data.strip] 85: end 86: 87: # redisplay the header every 40 rows 88: header.call if time[:rows] % 40 == 0 89: 90: print(date_column(:show, time), " ") 91: servers.each { |server| print(uptimes[server].join("/"), " ") } 92: puts 93: 94: # sleep this way, so that CTRL-C works immediately 95: delay.times { sleep 1; break unless running } 96: end 97: end
# File lib/capistrano/ext/monitor.rb, line 226 226: def put_asset(name, to) 227: put(File.read("#{File.dirname(__FILE__)}/assets/#{name}"), to) 228: end
# File lib/capistrano/ext/monitor.rb, line 226 226: def put_asset(name, to) 227: put(File.read("#{File.dirname(__FILE__)}/assets/#{name}"), to) 228: end
Get a value from the remote environment
# File lib/capistrano/ext/monitor.rb, line 49 49: def remote_env(value) 50: result = "" 51: run("echo $#{value}", :once => true) do |ch, stream, data| 52: raise "could not get environment variable #{value}: #{data}" if stream == :err 53: result << data 54: end 55: result.chomp 56: end
Get a value from the remote environment
# File lib/capistrano/ext/monitor.rb, line 49 49: def remote_env(value) 50: result = "" 51: run("echo $#{value}", :once => true) do |ch, stream, data| 52: raise "could not get environment variable #{value}: #{data}" if stream == :err 53: result << data 54: end 55: result.chomp 56: end
Monitor the number of requests per second being logged on the various servers.
# File lib/capistrano/ext/monitor.rb, line 101 101: def requests_per_second(*logs) 102: # extract our configurable options from the arguments 103: options = logs.last.is_a?(Hash) ? logs.pop : {} 104: request_pattern = options[:request_pattern] || "Completed in [0-9]" 105: sample_size = options[:sample_size] || 5 106: stats_to_show = options[:stats] || [0, 1, 5, 15] 107: num_format = options[:format] || "%4.1f" 108: 109: # set up the date column formatter, and get the list of servers 110: time = date_column(:init) 111: servers = current_task.servers.sort 112: 113: # initialize various helper variables we'll be using 114: mutex = Mutex.new 115: count = Hash.new(0) 116: running = false 117: channels = {} 118: 119: windows = Hash.new { |h,k| 120: h[k] = { 121: 1 => [], # last 1 minute 122: 5 => [], # last 5 minutes 123: 15 => [] # last 15 minutes 124: } 125: } 126: 127: minute_1 = 60 / sample_size 128: minute_5 = 300 / sample_size 129: minute_15 = 900 / sample_size 130: 131: # store our helper script on the servers. This script reduces the amount 132: # of traffic caused by tailing busy logs across the network, and also reduces 133: # the amount of work the client has to do. 134: script = "#{remote_env("HOME")}/x-request-counter.rb" 135: put_asset "request-counter.rb", script 136: 137: # set up (but don't start) the runner thread, which accumulates request 138: # counts from the servers. 139: runner = Thread.new do Thread.stop 140: running = true 141: run("echo 0 && tail -F #{logs.join(" ")} | ruby #{script} '#{request_pattern}'") do |ch, stream, out| 142: channels[ch[:host]] ||= ch 143: puts "#{ch[:host]}: #{out}" and break if stream == :err 144: mutex.synchronize { count[ch[:host]] += out.to_i } 145: end 146: running = false 147: end 148: 149: # let the runner thread get started 150: runner.wakeup 151: sleep 0.01 while !running 152: 153: # trap interrupt for graceful shutdown 154: trap("INT") { puts "[stopping]"; channels.values.each { |ch| ch.close; ch[:status] = 0 } } 155: 156: # compute the stuff we need to know for displaying the header 157: num_len = (num_format % 1).length 158: column_width = num_len * (servers.length + 1) + servers.length 159: abbvs = servers.map { |server| server.match(/^(\w+)/)[1][0,num_len] } 160: col_header = abbvs.map { |v| "%-*s" % [num_len, v] }.join("/") 161: 162: # write both rows of the header 163: stat_columns = stats_to_show.map { |n| 164: case n 165: when 0 then "#{sample_size} sec" 166: when 1 then "1 min" 167: when 5 then "5 min" 168: when 15 then "15 min" 169: else raise "unknown statistic #{n.inspect}" 170: end 171: } 172: 173: header = Proc.new do 174: puts 175: headers(" ", time[:width], *stat_columns.map { |v| [v, column_width] }.flatten) 176: headers("-", time[:width], *([col_header, column_width] * stats_to_show.length)) 177: end 178: 179: while running 180: # sleep for the specified sample size (5s by default) 181: (sample_size * 2).times { sleep(0.5); break unless running } 182: break unless running 183: 184: # lock the counters and compute our stats at this point in time 185: mutex.synchronize do 186: totals = Hash.new { |h,k| h[k] = Hash.new(0) } 187: 188: # for each server... 189: count.each do |k,c| 190: # push the latest sample onto the tracking queues 191: windows[k][1] = windows[k][1].push(count[k]).last(minute_1) 192: windows[k][5] = windows[k][5].push(count[k]).last(minute_5) 193: windows[k][15] = windows[k][15].push(count[k]).last(minute_15) 194: 195: # compute the stats for this server (k) 196: totals[k][0] = count[k].to_f / sample_size 197: totals[k][1] = windows[k][1].inject(0) { |n,i| n + i } / (windows[k][1].length * sample_size).to_f 198: totals[k][5] = windows[k][5].inject(0) { |n,i| n + i } / (windows[k][5].length * sample_size).to_f 199: totals[k][15] = windows[k][15].inject(0) { |n,i| n + i } / (windows[k][15].length * sample_size).to_f 200: 201: # add those stats to the totals per category 202: totals[:total][0] += totals[k][0] 203: totals[:total][1] += totals[k][1] 204: totals[:total][5] += totals[k][5] 205: totals[:total][15] += totals[k][15] 206: end 207: 208: # redisplay the header every 40 rows 209: header.call if time[:rows] % 40 == 0 210: 211: # show the stats 212: print(date_column(:show, time)) 213: stats_to_show.each do |stat| 214: print " " 215: servers.each { |server| print "#{num_format}/" % totals[server][stat] } 216: print(num_format % totals[:total][stat]) 217: end 218: puts 219: 220: # reset the sample counter 221: count = Hash.new(0) 222: end 223: end 224: end
Monitor the number of requests per second being logged on the various servers.
# File lib/capistrano/ext/monitor.rb, line 101 101: def requests_per_second(*logs) 102: # extract our configurable options from the arguments 103: options = logs.last.is_a?(Hash) ? logs.pop : {} 104: request_pattern = options[:request_pattern] || "Completed in [0-9]" 105: sample_size = options[:sample_size] || 5 106: stats_to_show = options[:stats] || [0, 1, 5, 15] 107: num_format = options[:format] || "%4.1f" 108: 109: # set up the date column formatter, and get the list of servers 110: time = date_column(:init) 111: servers = current_task.servers.sort 112: 113: # initialize various helper variables we'll be using 114: mutex = Mutex.new 115: count = Hash.new(0) 116: running = false 117: channels = {} 118: 119: windows = Hash.new { |h,k| 120: h[k] = { 121: 1 => [], # last 1 minute 122: 5 => [], # last 5 minutes 123: 15 => [] # last 15 minutes 124: } 125: } 126: 127: minute_1 = 60 / sample_size 128: minute_5 = 300 / sample_size 129: minute_15 = 900 / sample_size 130: 131: # store our helper script on the servers. This script reduces the amount 132: # of traffic caused by tailing busy logs across the network, and also reduces 133: # the amount of work the client has to do. 134: script = "#{remote_env("HOME")}/x-request-counter.rb" 135: put_asset "request-counter.rb", script 136: 137: # set up (but don't start) the runner thread, which accumulates request 138: # counts from the servers. 139: runner = Thread.new do Thread.stop 140: running = true 141: run("echo 0 && tail -F #{logs.join(" ")} | ruby #{script} '#{request_pattern}'") do |ch, stream, out| 142: channels[ch[:host]] ||= ch 143: puts "#{ch[:host]}: #{out}" and break if stream == :err 144: mutex.synchronize { count[ch[:host]] += out.to_i } 145: end 146: running = false 147: end 148: 149: # let the runner thread get started 150: runner.wakeup 151: sleep 0.01 while !running 152: 153: # trap interrupt for graceful shutdown 154: trap("INT") { puts "[stopping]"; channels.values.each { |ch| ch.close; ch[:status] = 0 } } 155: 156: # compute the stuff we need to know for displaying the header 157: num_len = (num_format % 1).length 158: column_width = num_len * (servers.length + 1) + servers.length 159: abbvs = servers.map { |server| server.match(/^(\w+)/)[1][0,num_len] } 160: col_header = abbvs.map { |v| "%-*s" % [num_len, v] }.join("/") 161: 162: # write both rows of the header 163: stat_columns = stats_to_show.map { |n| 164: case n 165: when 0 then "#{sample_size} sec" 166: when 1 then "1 min" 167: when 5 then "5 min" 168: when 15 then "15 min" 169: else raise "unknown statistic #{n.inspect}" 170: end 171: } 172: 173: header = Proc.new do 174: puts 175: headers(" ", time[:width], *stat_columns.map { |v| [v, column_width] }.flatten) 176: headers("-", time[:width], *([col_header, column_width] * stats_to_show.length)) 177: end 178: 179: while running 180: # sleep for the specified sample size (5s by default) 181: (sample_size * 2).times { sleep(0.5); break unless running } 182: break unless running 183: 184: # lock the counters and compute our stats at this point in time 185: mutex.synchronize do 186: totals = Hash.new { |h,k| h[k] = Hash.new(0) } 187: 188: # for each server... 189: count.each do |k,c| 190: # push the latest sample onto the tracking queues 191: windows[k][1] = windows[k][1].push(count[k]).last(minute_1) 192: windows[k][5] = windows[k][5].push(count[k]).last(minute_5) 193: windows[k][15] = windows[k][15].push(count[k]).last(minute_15) 194: 195: # compute the stats for this server (k) 196: totals[k][0] = count[k].to_f / sample_size 197: totals[k][1] = windows[k][1].inject(0) { |n,i| n + i } / (windows[k][1].length * sample_size).to_f 198: totals[k][5] = windows[k][5].inject(0) { |n,i| n + i } / (windows[k][5].length * sample_size).to_f 199: totals[k][15] = windows[k][15].inject(0) { |n,i| n + i } / (windows[k][15].length * sample_size).to_f 200: 201: # add those stats to the totals per category 202: totals[:total][0] += totals[k][0] 203: totals[:total][1] += totals[k][1] 204: totals[:total][5] += totals[k][5] 205: totals[:total][15] += totals[k][15] 206: end 207: 208: # redisplay the header every 40 rows 209: header.call if time[:rows] % 40 == 0 210: 211: # show the stats 212: print(date_column(:show, time)) 213: stats_to_show.each do |stat| 214: print " " 215: servers.each { |server| print "#{num_format}/" % totals[server][stat] } 216: print(num_format % totals[:total][stat]) 217: end 218: puts 219: 220: # reset the sample counter 221: count = Hash.new(0) 222: end 223: end 224: end
# File lib/capistrano/ext/monitor.rb, line 230 230: def uptime 231: results = {} 232: 233: puts "querying servers..." 234: run "uptime" do |ch, stream, out| 235: if stream == :err 236: results[ch[:host]] = { :error => "error: #{out.strip}" } 237: else 238: if out.strip =~ /(\S+)\s+up\s+(.*?),\s+(\d+) users?,\s+load averages?: (.*)/ 239: time = $1 240: uptime = $2 241: users = $3 242: loads = $4 243: 244: results[ch[:host]] = { :uptime => uptime.strip.gsub(/ +/, " "), 245: :loads => loads, 246: :users => users, 247: :time => time } 248: else 249: results[ch[:host]] = { :error => "unknown uptime format: #{out.strip}" } 250: end 251: end 252: end 253: 254: longest_hostname = results.keys.map { |k| k.length }.max 255: longest_uptime = results.values.map { |v| (v[:uptime] || "").length }.max 256: 257: by_role = {} 258: roles.each do |name, list| 259: by_role[name] = {} 260: list.each do |role| 261: next unless results[role.host] 262: by_role[name][role.host] = results.delete(role.host) 263: end 264: end 265: 266: by_role[:zzz] = results unless results.empty? 267: 268: add_newline = false 269: by_role.keys.sort_by { |k| k.to_s }.each do |role| 270: results = by_role[role] 271: next if results.empty? 272: 273: puts "\n" if add_newline 274: add_newline = true 275: 276: results.keys.sort.each do |server| 277: print "[%-*s] " % [longest_hostname, server] 278: if results[server][:error] 279: puts results[server][:error] 280: else 281: puts "up %*s, load %s" % [longest_uptime, results[server][:uptime], results[server][:loads]] 282: end 283: end 284: end 285: end
# File lib/capistrano/ext/monitor.rb, line 230 230: def uptime 231: results = {} 232: 233: puts "querying servers..." 234: run "uptime" do |ch, stream, out| 235: if stream == :err 236: results[ch[:host]] = { :error => "error: #{out.strip}" } 237: else 238: if out.strip =~ /(\S+)\s+up\s+(.*?),\s+(\d+) users?,\s+load averages?: (.*)/ 239: time = $1 240: uptime = $2 241: users = $3 242: loads = $4 243: 244: results[ch[:host]] = { :uptime => uptime.strip.gsub(/ +/, " "), 245: :loads => loads, 246: :users => users, 247: :time => time } 248: else 249: results[ch[:host]] = { :error => "unknown uptime format: #{out.strip}" } 250: end 251: end 252: end 253: 254: longest_hostname = results.keys.map { |k| k.length }.max 255: longest_uptime = results.values.map { |v| (v[:uptime] || "").length }.max 256: 257: by_role = {} 258: roles.each do |name, list| 259: by_role[name] = {} 260: list.each do |role| 261: next unless results[role.host] 262: by_role[name][role.host] = results.delete(role.host) 263: end 264: end 265: 266: by_role[:zzz] = results unless results.empty? 267: 268: add_newline = false 269: by_role.keys.sort_by { |k| k.to_s }.each do |role| 270: results = by_role[role] 271: next if results.empty? 272: 273: puts "\n" if add_newline 274: add_newline = true 275: 276: results.keys.sort.each do |server| 277: print "[%-*s] " % [longest_hostname, server] 278: if results[server][:error] 279: puts results[server][:error] 280: else 281: puts "up %*s, load %s" % [longest_uptime, results[server][:uptime], results[server][:loads]] 282: end 283: end 284: end 285: end