Module MonitorServers
In: lib/capistrano/ext/monitor.rb
lib/capistrano/ext/monitor.rb

Methods

Constants

LONG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
SHORT_TIME_FORMAT = "%H:%M:%S"
LONG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
SHORT_TIME_FORMAT = "%H:%M:%S"

Public Instance methods

A helper method for encapsulating the behavior of the date/time column in a report.

[Source]

    # File lib/capistrano/ext/monitor.rb, line 10
10:   def date_column(operation, *args)
11:     case operation
12:     when :init
13:       { :width => Time.now.strftime(LONG_TIME_FORMAT).length,
14:         :last => nil,
15:         :rows => 0 }
16:     when :show
17:       state = args.first
18:       now  = Time.now
19:       date = now.strftime(
20:         (state[:rows] % 10 == 0 || now.day != state[:last].day) ?
21:           LONG_TIME_FORMAT : SHORT_TIME_FORMAT)
22:       state[:last] = now
23:       state[:rows] += 1
24:       "%*s" % [state[:width], date]
25:     else
26:       raise "unknown operation #{operation.inspect}"
27:     end
28:   end

A helper method for encapsulating the behavior of the date/time column in a report.

[Source]

    # File lib/capistrano/ext/monitor.rb, line 10
10:   def date_column(operation, *args)
11:     case operation
12:     when :init
13:       { :width => Time.now.strftime(LONG_TIME_FORMAT).length,
14:         :last => nil,
15:         :rows => 0 }
16:     when :show
17:       state = args.first
18:       now  = Time.now
19:       date = now.strftime(
20:         (state[:rows] % 10 == 0 || now.day != state[:last].day) ?
21:           LONG_TIME_FORMAT : SHORT_TIME_FORMAT)
22:       state[:last] = now
23:       state[:rows] += 1
24:       "%*s" % [state[:width], date]
25:     else
26:       raise "unknown operation #{operation.inspect}"
27:     end
28:   end

A helper method for formatting table headers in a report.

[Source]

    # File lib/capistrano/ext/monitor.rb, line 31
31:   def headers(*args)
32:     0.step(args.length-1, 2) do |n|
33:       header = args[n]
34:       size   = args[n+1]
35:       if header == "-" || header == " " 
36:         print header * size, "  "
37:       else
38:         print header
39:         padding = size - header.length
40:         print " " if padding > 0
41:         print "-" * (padding - 1) if padding > 1
42:         print "  "
43:       end
44:     end
45:     puts
46:   end

A helper method for formatting table headers in a report.

[Source]

    # File lib/capistrano/ext/monitor.rb, line 31
31:   def headers(*args)
32:     0.step(args.length-1, 2) do |n|
33:       header = args[n]
34:       size   = args[n+1]
35:       if header == "-" || header == " " 
36:         print header * size, "  "
37:       else
38:         print header
39:         padding = size - header.length
40:         print " " if padding > 0
41:         print "-" * (padding - 1) if padding > 1
42:         print "  "
43:       end
44:     end
45:     puts
46:   end

Monitor the load of the servers tied to the current task.

[Source]

    # File lib/capistrano/ext/monitor.rb, line 59
59:   def load(options={})
60:     servers = current_task.servers.sort
61:     names = servers.map { |s| s.match(/^([^.]+)/)[1] }
62:     time = date_column(:init)
63:     load_column_width = "0.00".length * 3 + 2
64: 
65:     puts "connecting..."
66:     connect!
67: 
68:     parser = Proc.new { |text| text.match(/average.*: (.*)$/)[1].split(/, /) }
69:     delay = (options[:delay] || 30).to_i
70: 
71:     running = true
72:     trap("INT") { running = false; puts "[stopping]" }
73: 
74:     # THE HEADER
75:     header = Proc.new do
76:       puts
77:       headers("-", time[:width], *names.map { |n| [n, load_column_width] }.flatten)
78:     end
79: 
80:     while running
81:       uptimes = {}
82:       run "uptime" do |ch, stream, data|
83:         raise "error: #{data}" if stream == :err
84:         uptimes[ch[:host]] = parser[data.strip]
85:       end
86: 
87:       # redisplay the header every 40 rows
88:       header.call if time[:rows] % 40 == 0
89: 
90:       print(date_column(:show, time), "  ")
91:       servers.each { |server| print(uptimes[server].join("/"), "  ") }
92:       puts
93: 
94:       # sleep this way, so that CTRL-C works immediately
95:       delay.times { sleep 1; break unless running }
96:     end
97:   end

Monitor the load of the servers tied to the current task.

[Source]

    # File lib/capistrano/ext/monitor.rb, line 59
59:   def load(options={})
60:     servers = current_task.servers.sort
61:     names = servers.map { |s| s.match(/^([^.]+)/)[1] }
62:     time = date_column(:init)
63:     load_column_width = "0.00".length * 3 + 2
64: 
65:     puts "connecting..."
66:     connect!
67: 
68:     parser = Proc.new { |text| text.match(/average.*: (.*)$/)[1].split(/, /) }
69:     delay = (options[:delay] || 30).to_i
70: 
71:     running = true
72:     trap("INT") { running = false; puts "[stopping]" }
73: 
74:     # THE HEADER
75:     header = Proc.new do
76:       puts
77:       headers("-", time[:width], *names.map { |n| [n, load_column_width] }.flatten)
78:     end
79: 
80:     while running
81:       uptimes = {}
82:       run "uptime" do |ch, stream, data|
83:         raise "error: #{data}" if stream == :err
84:         uptimes[ch[:host]] = parser[data.strip]
85:       end
86: 
87:       # redisplay the header every 40 rows
88:       header.call if time[:rows] % 40 == 0
89: 
90:       print(date_column(:show, time), "  ")
91:       servers.each { |server| print(uptimes[server].join("/"), "  ") }
92:       puts
93: 
94:       # sleep this way, so that CTRL-C works immediately
95:       delay.times { sleep 1; break unless running }
96:     end
97:   end

[Source]

     # File lib/capistrano/ext/monitor.rb, line 226
226:   def put_asset(name, to)
227:     put(File.read("#{File.dirname(__FILE__)}/assets/#{name}"), to)
228:   end

[Source]

     # File lib/capistrano/ext/monitor.rb, line 226
226:   def put_asset(name, to)
227:     put(File.read("#{File.dirname(__FILE__)}/assets/#{name}"), to)
228:   end

Get a value from the remote environment

[Source]

    # File lib/capistrano/ext/monitor.rb, line 49
49:   def remote_env(value)
50:     result = ""
51:     run("echo $#{value}", :once => true) do |ch, stream, data|
52:       raise "could not get environment variable #{value}: #{data}" if stream == :err
53:       result << data
54:     end
55:     result.chomp
56:   end

Get a value from the remote environment

[Source]

    # File lib/capistrano/ext/monitor.rb, line 49
49:   def remote_env(value)
50:     result = ""
51:     run("echo $#{value}", :once => true) do |ch, stream, data|
52:       raise "could not get environment variable #{value}: #{data}" if stream == :err
53:       result << data
54:     end
55:     result.chomp
56:   end

Monitor the number of requests per second being logged on the various servers.

[Source]

     # File lib/capistrano/ext/monitor.rb, line 101
101:   def requests_per_second(*logs)
102:     # extract our configurable options from the arguments
103:     options = logs.last.is_a?(Hash) ? logs.pop : {}
104:     request_pattern = options[:request_pattern] || "Completed in [0-9]"
105:     sample_size = options[:sample_size] || 5
106:     stats_to_show = options[:stats] || [0, 1, 5, 15]
107:     num_format = options[:format] || "%4.1f"
108: 
109:     # set up the date column formatter, and get the list of servers
110:     time = date_column(:init)
111:     servers = current_task.servers.sort
112: 
113:     # initialize various helper variables we'll be using
114:     mutex = Mutex.new
115:     count = Hash.new(0)
116:     running = false
117:     channels = {}
118: 
119:     windows = Hash.new { |h,k|
120:       h[k] = {
121:         1  => [], # last 1 minute
122:         5  => [], # last 5 minutes
123:         15 => []  # last 15 minutes
124:       }
125:     }
126: 
127:     minute_1 = 60 / sample_size
128:     minute_5 = 300 / sample_size
129:     minute_15 = 900 / sample_size
130: 
131:     # store our helper script on the servers. This script reduces the amount
132:     # of traffic caused by tailing busy logs across the network, and also reduces
133:     # the amount of work the client has to do.
134:     script = "#{remote_env("HOME")}/x-request-counter.rb"
135:     put_asset "request-counter.rb", script
136: 
137:     # set up (but don't start) the runner thread, which accumulates request
138:     # counts from the servers.
139:     runner = Thread.new do Thread.stop
140:       running = true
141:       run("echo 0 && tail -F #{logs.join(" ")} | ruby #{script} '#{request_pattern}'") do |ch, stream, out|
142:         channels[ch[:host]] ||= ch
143:         puts "#{ch[:host]}: #{out}" and break if stream == :err
144:         mutex.synchronize { count[ch[:host]] += out.to_i }
145:       end
146:       running = false
147:     end
148: 
149:     # let the runner thread get started
150:     runner.wakeup
151:     sleep 0.01 while !running
152: 
153:     # trap interrupt for graceful shutdown
154:     trap("INT") { puts "[stopping]"; channels.values.each { |ch| ch.close; ch[:status] = 0 } }
155: 
156:     # compute the stuff we need to know for displaying the header
157:     num_len = (num_format % 1).length
158:     column_width = num_len * (servers.length + 1) + servers.length
159:     abbvs = servers.map { |server| server.match(/^(\w+)/)[1][0,num_len] }
160:     col_header = abbvs.map { |v| "%-*s" % [num_len, v] }.join("/")
161: 
162:     # write both rows of the header
163:     stat_columns = stats_to_show.map { |n|
164:         case n
165:         when 0 then "#{sample_size} sec"
166:         when 1 then "1 min"
167:         when 5 then "5 min"
168:         when 15 then "15 min"
169:         else raise "unknown statistic #{n.inspect}"
170:         end
171:       }
172: 
173:     header = Proc.new do
174:       puts
175:       headers(" ", time[:width], *stat_columns.map { |v| [v, column_width] }.flatten)
176:       headers("-", time[:width], *([col_header, column_width] * stats_to_show.length))
177:     end
178:     
179:     while running
180:       # sleep for the specified sample size (5s by default)
181:       (sample_size * 2).times { sleep(0.5); break unless running }
182:       break unless running
183: 
184:       # lock the counters and compute our stats at this point in time
185:       mutex.synchronize do
186:         totals = Hash.new { |h,k| h[k] = Hash.new(0) }
187: 
188:         # for each server...
189:         count.each do |k,c|
190:           # push the latest sample onto the tracking queues
191:           windows[k][1] = windows[k][1].push(count[k]).last(minute_1)
192:           windows[k][5] = windows[k][5].push(count[k]).last(minute_5)
193:           windows[k][15] = windows[k][15].push(count[k]).last(minute_15)
194: 
195:           # compute the stats for this server (k)
196:           totals[k][0] = count[k].to_f / sample_size
197:           totals[k][1] = windows[k][1].inject(0) { |n,i| n + i } / (windows[k][1].length * sample_size).to_f
198:           totals[k][5] = windows[k][5].inject(0) { |n,i| n + i } / (windows[k][5].length * sample_size).to_f
199:           totals[k][15] = windows[k][15].inject(0) { |n,i| n + i } / (windows[k][15].length * sample_size).to_f
200: 
201:           # add those stats to the totals per category
202:           totals[:total][0] += totals[k][0]
203:           totals[:total][1] += totals[k][1]
204:           totals[:total][5] += totals[k][5]
205:           totals[:total][15] += totals[k][15]
206:         end
207: 
208:         # redisplay the header every 40 rows
209:         header.call if time[:rows] % 40 == 0
210: 
211:         # show the stats
212:         print(date_column(:show, time))
213:         stats_to_show.each do |stat|
214:           print "  "
215:           servers.each { |server| print "#{num_format}/" % totals[server][stat] }
216:           print(num_format % totals[:total][stat])
217:         end
218:         puts
219: 
220:         # reset the sample counter
221:         count = Hash.new(0)
222:       end
223:     end
224:   end

Monitor the number of requests per second being logged on the various servers.

[Source]

     # File lib/capistrano/ext/monitor.rb, line 101
101:   def requests_per_second(*logs)
102:     # extract our configurable options from the arguments
103:     options = logs.last.is_a?(Hash) ? logs.pop : {}
104:     request_pattern = options[:request_pattern] || "Completed in [0-9]"
105:     sample_size = options[:sample_size] || 5
106:     stats_to_show = options[:stats] || [0, 1, 5, 15]
107:     num_format = options[:format] || "%4.1f"
108: 
109:     # set up the date column formatter, and get the list of servers
110:     time = date_column(:init)
111:     servers = current_task.servers.sort
112: 
113:     # initialize various helper variables we'll be using
114:     mutex = Mutex.new
115:     count = Hash.new(0)
116:     running = false
117:     channels = {}
118: 
119:     windows = Hash.new { |h,k|
120:       h[k] = {
121:         1  => [], # last 1 minute
122:         5  => [], # last 5 minutes
123:         15 => []  # last 15 minutes
124:       }
125:     }
126: 
127:     minute_1 = 60 / sample_size
128:     minute_5 = 300 / sample_size
129:     minute_15 = 900 / sample_size
130: 
131:     # store our helper script on the servers. This script reduces the amount
132:     # of traffic caused by tailing busy logs across the network, and also reduces
133:     # the amount of work the client has to do.
134:     script = "#{remote_env("HOME")}/x-request-counter.rb"
135:     put_asset "request-counter.rb", script
136: 
137:     # set up (but don't start) the runner thread, which accumulates request
138:     # counts from the servers.
139:     runner = Thread.new do Thread.stop
140:       running = true
141:       run("echo 0 && tail -F #{logs.join(" ")} | ruby #{script} '#{request_pattern}'") do |ch, stream, out|
142:         channels[ch[:host]] ||= ch
143:         puts "#{ch[:host]}: #{out}" and break if stream == :err
144:         mutex.synchronize { count[ch[:host]] += out.to_i }
145:       end
146:       running = false
147:     end
148: 
149:     # let the runner thread get started
150:     runner.wakeup
151:     sleep 0.01 while !running
152: 
153:     # trap interrupt for graceful shutdown
154:     trap("INT") { puts "[stopping]"; channels.values.each { |ch| ch.close; ch[:status] = 0 } }
155: 
156:     # compute the stuff we need to know for displaying the header
157:     num_len = (num_format % 1).length
158:     column_width = num_len * (servers.length + 1) + servers.length
159:     abbvs = servers.map { |server| server.match(/^(\w+)/)[1][0,num_len] }
160:     col_header = abbvs.map { |v| "%-*s" % [num_len, v] }.join("/")
161: 
162:     # write both rows of the header
163:     stat_columns = stats_to_show.map { |n|
164:         case n
165:         when 0 then "#{sample_size} sec"
166:         when 1 then "1 min"
167:         when 5 then "5 min"
168:         when 15 then "15 min"
169:         else raise "unknown statistic #{n.inspect}"
170:         end
171:       }
172: 
173:     header = Proc.new do
174:       puts
175:       headers(" ", time[:width], *stat_columns.map { |v| [v, column_width] }.flatten)
176:       headers("-", time[:width], *([col_header, column_width] * stats_to_show.length))
177:     end
178:     
179:     while running
180:       # sleep for the specified sample size (5s by default)
181:       (sample_size * 2).times { sleep(0.5); break unless running }
182:       break unless running
183: 
184:       # lock the counters and compute our stats at this point in time
185:       mutex.synchronize do
186:         totals = Hash.new { |h,k| h[k] = Hash.new(0) }
187: 
188:         # for each server...
189:         count.each do |k,c|
190:           # push the latest sample onto the tracking queues
191:           windows[k][1] = windows[k][1].push(count[k]).last(minute_1)
192:           windows[k][5] = windows[k][5].push(count[k]).last(minute_5)
193:           windows[k][15] = windows[k][15].push(count[k]).last(minute_15)
194: 
195:           # compute the stats for this server (k)
196:           totals[k][0] = count[k].to_f / sample_size
197:           totals[k][1] = windows[k][1].inject(0) { |n,i| n + i } / (windows[k][1].length * sample_size).to_f
198:           totals[k][5] = windows[k][5].inject(0) { |n,i| n + i } / (windows[k][5].length * sample_size).to_f
199:           totals[k][15] = windows[k][15].inject(0) { |n,i| n + i } / (windows[k][15].length * sample_size).to_f
200: 
201:           # add those stats to the totals per category
202:           totals[:total][0] += totals[k][0]
203:           totals[:total][1] += totals[k][1]
204:           totals[:total][5] += totals[k][5]
205:           totals[:total][15] += totals[k][15]
206:         end
207: 
208:         # redisplay the header every 40 rows
209:         header.call if time[:rows] % 40 == 0
210: 
211:         # show the stats
212:         print(date_column(:show, time))
213:         stats_to_show.each do |stat|
214:           print "  "
215:           servers.each { |server| print "#{num_format}/" % totals[server][stat] }
216:           print(num_format % totals[:total][stat])
217:         end
218:         puts
219: 
220:         # reset the sample counter
221:         count = Hash.new(0)
222:       end
223:     end
224:   end

[Source]

     # File lib/capistrano/ext/monitor.rb, line 230
230:   def uptime
231:     results = {}
232: 
233:     puts "querying servers..."
234:     run "uptime" do |ch, stream, out|
235:       if stream == :err
236:         results[ch[:host]] = { :error => "error: #{out.strip}" }
237:       else
238:         if out.strip =~ /(\S+)\s+up\s+(.*?),\s+(\d+) users?,\s+load averages?: (.*)/
239:           time   = $1
240:           uptime = $2
241:           users  = $3
242:           loads  = $4
243: 
244:           results[ch[:host]] = { :uptime => uptime.strip.gsub(/  +/, " "),
245:                                  :loads  => loads,
246:                                  :users  => users,
247:                                  :time   => time }
248:         else
249:           results[ch[:host]] = { :error => "unknown uptime format: #{out.strip}" }
250:         end
251:       end
252:     end
253: 
254:     longest_hostname = results.keys.map { |k| k.length }.max
255:     longest_uptime = results.values.map { |v| (v[:uptime] || "").length }.max
256: 
257:     by_role = {}
258:     roles.each do |name, list|
259:       by_role[name] = {}
260:       list.each do |role|
261:         next unless results[role.host]
262:         by_role[name][role.host] = results.delete(role.host)
263:       end
264:     end
265: 
266:     by_role[:zzz] = results unless results.empty?
267: 
268:     add_newline = false
269:     by_role.keys.sort_by { |k| k.to_s }.each do |role|
270:       results = by_role[role]
271:       next if results.empty?
272: 
273:       puts "\n" if add_newline
274:       add_newline = true
275: 
276:       results.keys.sort.each do |server|
277:         print "[%-*s] " % [longest_hostname, server]
278:         if results[server][:error]
279:           puts results[server][:error]
280:         else
281:           puts "up %*s, load %s" % [longest_uptime, results[server][:uptime], results[server][:loads]]
282:         end
283:       end
284:     end
285:   end

[Source]

     # File lib/capistrano/ext/monitor.rb, line 230
230:   def uptime
231:     results = {}
232: 
233:     puts "querying servers..."
234:     run "uptime" do |ch, stream, out|
235:       if stream == :err
236:         results[ch[:host]] = { :error => "error: #{out.strip}" }
237:       else
238:         if out.strip =~ /(\S+)\s+up\s+(.*?),\s+(\d+) users?,\s+load averages?: (.*)/
239:           time   = $1
240:           uptime = $2
241:           users  = $3
242:           loads  = $4
243: 
244:           results[ch[:host]] = { :uptime => uptime.strip.gsub(/  +/, " "),
245:                                  :loads  => loads,
246:                                  :users  => users,
247:                                  :time   => time }
248:         else
249:           results[ch[:host]] = { :error => "unknown uptime format: #{out.strip}" }
250:         end
251:       end
252:     end
253: 
254:     longest_hostname = results.keys.map { |k| k.length }.max
255:     longest_uptime = results.values.map { |v| (v[:uptime] || "").length }.max
256: 
257:     by_role = {}
258:     roles.each do |name, list|
259:       by_role[name] = {}
260:       list.each do |role|
261:         next unless results[role.host]
262:         by_role[name][role.host] = results.delete(role.host)
263:       end
264:     end
265: 
266:     by_role[:zzz] = results unless results.empty?
267: 
268:     add_newline = false
269:     by_role.keys.sort_by { |k| k.to_s }.each do |role|
270:       results = by_role[role]
271:       next if results.empty?
272: 
273:       puts "\n" if add_newline
274:       add_newline = true
275: 
276:       results.keys.sort.each do |server|
277:         print "[%-*s] " % [longest_hostname, server]
278:         if results[server][:error]
279:           puts results[server][:error]
280:         else
281:           puts "up %*s, load %s" % [longest_uptime, results[server][:uptime], results[server][:loads]]
282:         end
283:       end
284:     end
285:   end

[Validate]