This is Unofficial EPICS BASE Doxygen Site
tcpRecvWatchdog.cpp
Go to the documentation of this file.
1 /*************************************************************************\
2 * Copyright (c) 2002 The University of Chicago, as Operator of Argonne
3 * National Laboratory.
4 * Copyright (c) 2002 The Regents of the University of California, as
5 * Operator of Los Alamos National Laboratory.
6 * EPICS BASE is distributed subject to a Software License Agreement found
7 * in file LICENSE that is included with this distribution.
8 \*************************************************************************/
9 /*
10  *
11  * L O S A L A M O S
12  * Los Alamos National Laboratory
13  * Los Alamos, New Mexico 87545
14  *
15  * Copyright, 1986, The Regents of the University of California.
16  *
17  * Author: Jeff Hill
18  */
19 
20 #define epicsAssertAuthor "Jeff Hill johill@lanl.gov"
21 
22 #include "iocinf.h"
23 #include "cac.h"
24 #include "virtualCircuit.h"
25 
26 //
27 // the recv watchdog timer is active when this object is created
28 //
30  ( epicsMutex & cbMutexIn, cacContextNotify & ctxNotifyIn,
31  epicsMutex & mutexIn, tcpiiu & iiuIn,
32  double periodIn, epicsTimerQueue & queueIn ) :
33  period ( periodIn ), timer ( queueIn.createTimer () ),
34  cbMutex ( cbMutexIn ), ctxNotify ( ctxNotifyIn ),
35  mutex ( mutexIn ), iiu ( iiuIn ),
36  probeResponsePending ( false ), beaconAnomaly ( true ),
37  probeTimeoutDetected ( false ), shuttingDown ( false )
38 {
39 }
40 
42 {
43  this->timer.destroy ();
44 }
45 
46 epicsTimerNotify::expireStatus
47 tcpRecvWatchdog::expire ( const epicsTime & /* currentTime */ )
48 {
49  epicsGuard < epicsMutex > guard ( this->mutex );
50  if ( this->shuttingDown ) {
51  return noRestart;
52  }
53  if ( this->probeResponsePending ) {
54  if ( this->iiu.receiveThreadIsBusy ( guard ) ) {
55  return expireStatus ( restart, CA_ECHO_TIMEOUT );
56  }
57 
58  {
59 # ifdef DEBUG
60  char hostName[128];
61  this->iiu.getHostName ( guard, hostName, sizeof (hostName) );
62  debugPrintf ( ( "CA server \"%s\" unresponsive after %g inactive sec"
63  "- disconnecting.\n",
64  hostName, this->period ) );
65 # endif
66  // to get the callback lock safely we must reorder
67  // the lock hierarchy
68  epicsGuardRelease < epicsMutex > unguard ( guard );
69  {
70  // callback lock is required because channel disconnect
71  // state change is initiated from this thread, and
72  // this can cause their disconnect notify callback
73  // to be invoked.
74  callbackManager mgr ( this->ctxNotify, this->cbMutex );
75  epicsGuard < epicsMutex > tmpGuard ( this->mutex );
76  this->iiu.receiveTimeoutNotify ( mgr, tmpGuard );
77  this->probeTimeoutDetected = true;
78  }
79  }
80  return noRestart;
81  }
82  else {
83  if ( this->iiu.receiveThreadIsBusy ( guard ) ) {
84  return expireStatus ( restart, this->period );
85  }
86  this->probeTimeoutDetected = false;
87  this->probeResponsePending = this->iiu.setEchoRequestPending ( guard );
88  debugPrintf ( ("circuit timed out - sending echo request\n") );
89  return expireStatus ( restart, CA_ECHO_TIMEOUT );
90  }
91 }
92 
95 {
96  guard.assertIdenticalMutex ( this->mutex );
97  if ( ! ( this->shuttingDown || this->beaconAnomaly || this->probeResponsePending ) ) {
98  this->timer.start ( *this, this->period );
99  debugPrintf ( ("saw a normal beacon - reseting circuit receive watchdog\n") );
100  }
101 }
102 
103 //
104 // be careful about using beacons to reset the connection
105 // time out watchdog until we have received a ping response
106 // from the IOC (this makes the software detect reconnects
107 // faster when the server is rebooted twice in rapid
108 // succession before a 1st or 2nd beacon has been received)
109 //
111  epicsGuard < epicsMutex > & guard )
112 {
113  guard.assertIdenticalMutex ( this->mutex );
114  this->beaconAnomaly = true;
115  debugPrintf ( ("Saw an abnormal beacon\n") );
116 }
117 
119  epicsGuard < epicsMutex > & guard )
120 {
121  guard.assertIdenticalMutex ( this->mutex );
122 
123  if ( ! ( this->shuttingDown || this->probeResponsePending ) ) {
124  this->beaconAnomaly = false;
125  this->timer.start ( *this, this->period );
126  debugPrintf ( ("received a message - reseting circuit recv watchdog\n") );
127  }
128 }
129 
131  epicsGuard < epicsMutex > & cbGuard )
132 {
133  bool restartNeeded = false;
134  double restartDelay = DBL_MAX;
135  {
136  epicsGuard < epicsMutex > guard ( this->mutex );
137  if ( this->probeResponsePending && ! this->shuttingDown ) {
138  restartNeeded = true;
139  if ( this->probeTimeoutDetected ) {
140  this->probeTimeoutDetected = false;
141  this->probeResponsePending = this->iiu.setEchoRequestPending ( guard );
142  restartDelay = CA_ECHO_TIMEOUT;
143  debugPrintf ( ("late probe response - sending another probe request\n") );
144  }
145  else {
146  this->probeResponsePending = false;
147  restartDelay = this->period;
148  this->iiu.responsiveCircuitNotify ( cbGuard, guard );
149  debugPrintf ( ("probe response on time - circuit was tagged reponsive if unresponsive\n") );
150  }
151  }
152  }
153  if ( restartNeeded ) {
154  this->timer.start ( *this, restartDelay );
155  debugPrintf ( ("recv wd restarted with delay %f\n", restartDelay) );
156  }
157 }
158 
159 //
160 // The thread for outgoing requests in the client runs
161 // at a higher priority than the thread in the client
162 // that receives responses. Therefore, there could
163 // be considerable large array write send backlog that
164 // is delaying departure of an echo request and also
165 // interrupting delivery of an echo response.
166 // We must be careful not to timeout the echo response as
167 // long as we see indication of regular departures of
168 // message buffers from the client in a situation where
169 // we know that the TCP send queueing has been exceeded.
170 // The send watchdog will be responsible for detecting
171 // dead connections in this case.
172 //
174  epicsGuard < epicsMutex > & guard )
175 {
176  guard.assertIdenticalMutex ( this->mutex );
177 
178  // We dont set "beaconAnomaly" to be false here because, after we see a
179  // beacon anomaly (which could be transiently detecting a reboot) we will
180  // not trust the beacon as an indicator of a healthy server until we
181  // receive at least one message from the server.
182  if ( this->probeResponsePending && ! this->shuttingDown ) {
183  this->timer.start ( *this, CA_ECHO_TIMEOUT );
184  debugPrintf ( ("saw heavy send backlog - reseting circuit recv watchdog\n") );
185  }
186 }
187 
189  epicsGuard < epicsMutex > & guard )
190 {
191  guard.assertIdenticalMutex ( this->mutex );
192  if ( this->shuttingDown ) {
193  return;
194  }
195  this->timer.start ( *this, this->period );
196  debugPrintf ( ("connected to the server - initiating circuit recv watchdog\n") );
197 }
198 
200  epicsGuard < epicsMutex > & /* cbGuard */,
201  epicsGuard < epicsMutex > & guard )
202 {
203  guard.assertIdenticalMutex ( this->mutex );
204 
205  bool restartNeeded = false;
206  if ( ! ( this->probeResponsePending || this->shuttingDown ) ) {
207  this->probeResponsePending = this->iiu.setEchoRequestPending ( guard );
208  restartNeeded = true;
209  }
210  if ( restartNeeded ) {
211  this->timer.start ( *this, CA_ECHO_TIMEOUT );
212  }
213  debugPrintf ( ("TCP send timed out - sending echo request\n") );
214 }
215 
217 {
218  this->timer.cancel ();
219  debugPrintf ( ("canceling TCP recv watchdog\n") );
220 }
221 
223 {
224  {
225  epicsGuard < epicsMutex > guard ( this->mutex );
226  this->shuttingDown = true;
227  }
228  this->timer.cancel ();
229  debugPrintf ( ("canceling TCP recv watchdog\n") );
230 }
231 
232 double tcpRecvWatchdog::delay () const
233 {
234  return this->timer.getExpireDelay ();
235 }
236 
237 void tcpRecvWatchdog::show ( unsigned level ) const
238 {
239  epicsGuard < epicsMutex > guard ( this->mutex );
240 
241  ::printf ( "Receive virtual circuit watchdog at %p, period %f\n",
242  static_cast <const void *> ( this ), this->period );
243  if ( level > 0u ) {
244  ::printf ( "\t%s %s %s\n",
245  this->probeResponsePending ? "probe-response-pending" : "",
246  this->beaconAnomaly ? "beacon-anomaly-detected" : "",
247  this->probeTimeoutDetected ? "probe-response-timeout" : "" );
248  }
249 }
250 
void sendTimeoutNotify(epicsGuard< epicsMutex > &cbGuard, epicsGuard< epicsMutex > &guard)
virtual ~tcpRecvWatchdog()
void destroy()
Definition: timer.cpp:47
#define printf
Definition: epicsStdio.h:41
void assertIdenticalMutex(const T &) const
Definition: epicsGuard.h:80
void receiveTimeoutNotify(callbackManager &, epicsGuard< epicsMutex > &)
Definition: tcpiiu.cpp:889
void sendBacklogProgressNotify(epicsGuard< epicsMutex > &)
void responsiveCircuitNotify(epicsGuard< epicsMutex > &cbGuard, epicsGuard< epicsMutex > &guard)
Definition: tcpiiu.cpp:860
double delay() const
void probeResponseNotify(epicsGuard< epicsMutex > &)
epicsMutex mutex
Definition: pvAccess.cpp:71
void show(unsigned level) const
void connectNotify(epicsGuard< epicsMutex > &)
void beaconAnomalyNotify(epicsGuard< epicsMutex > &)
void messageArrivalNotify(epicsGuard< epicsMutex > &guard)
void cancel()
Definition: timer.cpp:135
bool receiveThreadIsBusy(epicsGuard< epicsMutex > &)
#define debugPrintf(argsInParen)
Definition: iocinf.h:30
void beaconArrivalNotify(epicsGuard< epicsMutex > &)
tcpRecvWatchdog(epicsMutex &cbMutex, cacContextNotify &ctxNotify, epicsMutex &mutex, tcpiiu &, double periodIn, epicsTimerQueue &)
unsigned getHostName(epicsGuard< epicsMutex > &, char *pBuf, unsigned bufLength) const
Definition: tcpiiu.cpp:1791
void start(class epicsTimerNotify &, const epicsTime &)
Definition: timer.cpp:59
bool setEchoRequestPending(epicsGuard< epicsMutex > &)
Definition: tcpiiu.cpp:1121