pinglog.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. package pinglog
  2. import (
  3. "fmt"
  4. "github.com/gogf/gf/v2/database/gdb"
  5. "github.com/gogf/gf/v2/frame/g"
  6. "github.com/gogf/gf/v2/os/glog"
  7. "github.com/gogf/gf/v2/os/gtime"
  8. "golang.org/x/net/context"
  9. "nodeMonitor/internal/consts"
  10. "nodeMonitor/internal/dao"
  11. "nodeMonitor/internal/model"
  12. "nodeMonitor/internal/model/do"
  13. "nodeMonitor/internal/service"
  14. )
  15. type (
  16. sPing struct{}
  17. )
  18. func init() {
  19. service.RegisterPing(New())
  20. }
  21. func New() *sPing {
  22. return &sPing{}
  23. }
  24. // Create 创建延迟日志
  25. func (c *sPing) Create(ctx context.Context, ping model.PingSt, setverid int) error {
  26. return dao.Pinglog.Transaction(ctx, func(ctx context.Context, tx gdb.TX) error {
  27. _, err := dao.Pinglog.Ctx(ctx).Data(do.Pinglog{
  28. Maxdelay: ping.MaxDelay,
  29. Mindelay: ping.MinDelay,
  30. Avgdelay: ping.AvgDelay,
  31. Losspk: ping.LossPk,
  32. Serverid: setverid,
  33. }).Insert()
  34. return err
  35. })
  36. }
  37. func (c *sPing) GetHyStatus(ctx context.Context, ping model.PingSt, serverid int) (bool, error) {
  38. pingConfig, err := service.PingConfig().Get(ctx)
  39. if err != nil {
  40. glog.Debug(ctx, err.Error())
  41. return false, err
  42. }
  43. // 查询节点上一次有离线记录的时间
  44. var lastDownTime *gtime.Time
  45. err = dao.Pinglog.Ctx(ctx).
  46. Where("avgdelay = 0").
  47. Where("losspk > ?", pingConfig.NodeLoos).
  48. Where("maxdelay = 0").
  49. Where("serverid = ?", serverid).
  50. OrderDesc("create_at").
  51. Limit(1).
  52. Scan(&lastDownTime)
  53. if err != nil {
  54. glog.Debug(ctx, err.Error())
  55. return false, err
  56. }
  57. // 如果没有找到上一次的离线记录,返回 false
  58. if lastDownTime == nil {
  59. return false, nil
  60. }
  61. return true, nil
  62. }
  63. // GetStatus 查询节点状态
  64. // SELECT * FROM pinglog WHERE create_at >= DATE_SUB(NOW(),INTERVAL 10 MINUTE) and avgdelay = 0 and losspk >= 0 and maxdelay = 0 and `serverid` = '1'gf
  65. func (c *sPing) GetStatus(ctx context.Context, serverid int) (consts.NodeStatus, error) {
  66. //查询10分钟内的数据
  67. startTime, err := g.Cfg().Get(ctx, "node.startTime")
  68. if err != nil {
  69. glog.Debug(ctx, err.Error())
  70. return consts.Normal, err
  71. }
  72. //查询的数据超过3次就是不ok了
  73. //loos, err := g.Cfg().Get(ctx, "node.loos")
  74. //if err != nil {
  75. // glog.Debug(ctx, err.Error())
  76. // return false, err
  77. //}
  78. // 获取 ping 配置
  79. pingConfig, err := service.PingConfig().Get(ctx)
  80. if err != nil {
  81. glog.Debug(ctx, err.Error())
  82. return consts.Normal, err
  83. }
  84. // 查询最近10分钟内的 ping 日志,判断节点是否不通
  85. failCount, err := dao.Pinglog.Ctx(ctx).
  86. Where("create_at > DATE_SUB(NOW(), INTERVAL ? MINUTE)", startTime).
  87. Where("avgdelay = 0").
  88. Where("losspk > ?", pingConfig.NodeLoos).
  89. Where("maxdelay = 0").
  90. Where("serverid = ?", serverid).
  91. Count()
  92. if err != nil {
  93. glog.Debug(ctx, err.Error())
  94. return consts.Normal, err
  95. }
  96. // 如果节点当前是离线状态,记录下离线时间并返回 Down
  97. if failCount >= pingConfig.NodeDie {
  98. lastDownTime := gtime.Now()
  99. err := service.Node().UpdateNodeLasDowTime(ctx, serverid, lastDownTime)
  100. if err != nil {
  101. glog.Debug(ctx, err.Error())
  102. return consts.Normal, err
  103. }
  104. return consts.Down, nil
  105. }
  106. // 查询节点上一次有离线记录的时间
  107. lastDownTime, err := service.Node().GetLasDowTime(ctx, serverid)
  108. if err != nil {
  109. glog.Debug(ctx, err.Error())
  110. return consts.Normal, err
  111. }
  112. glog.Info(ctx, fmt.Sprintf("当前节点ID%d 查询上次离线记录成功", serverid))
  113. // 如果没有找到上一次的离线记录,返回 Normal
  114. if lastDownTime == nil {
  115. glog.Info(ctx, fmt.Sprintf("当前节点ID%d 离线记录不为空 返回 Normal", serverid))
  116. return consts.Normal, nil
  117. }
  118. // 查询最近10分钟内的 ping 日志,判断节点是否恢复
  119. successCount, err := dao.Pinglog.Ctx(ctx).
  120. Where("create_at > ?", lastDownTime).
  121. Where("avgdelay > 0").
  122. Where("serverid = ?", serverid).
  123. Count()
  124. if err != nil {
  125. glog.Debug(ctx, err.Error())
  126. return consts.Normal, err
  127. }
  128. glog.Info(ctx, fmt.Sprintf("当前节点ID%d 查询 节点是否恢复成功", serverid))
  129. // 如果节点之前是离线状态且现在恢复了,返回 Recovered
  130. // 如果节点之前是离线状态且现在恢复了,清除离线记录并返回 Recovered
  131. if successCount >= pingConfig.NodeRecover {
  132. glog.Info(ctx, fmt.Sprintf("当前节点ID%d 查询 之前是离线状态且现在恢复了返回 清除离线记录并返回 Recovered", serverid))
  133. err := service.Node().UpdateNodeLasDowTime(ctx, serverid, nil)
  134. if err != nil {
  135. glog.Debug(ctx, err.Error())
  136. return consts.Normal, err
  137. }
  138. return consts.Recovered, nil
  139. }
  140. // 返回正常状态
  141. return consts.Normal, nil
  142. }
  143. func (c *sPing) ClearLog(ctx context.Context) {
  144. clearLogTime, err := g.Cfg().Get(ctx, "node.clearLogTime")
  145. if err != nil {
  146. glog.Debug(ctx, err.Error())
  147. return
  148. }
  149. timeStartStr := gtime.Now().AddDate(0, 0, -clearLogTime.Int()).String()
  150. glog.Info(ctx, timeStartStr)
  151. _, err = dao.Pinglog.Ctx(ctx).Where("create_at <= ?", timeStartStr).Delete()
  152. if err != nil {
  153. glog.Error(ctx, err)
  154. return
  155. }
  156. }