Diagrams of the Fault Tolerance Sample
Loading

Diagrams of the Fault Tolerance Sample

../_images/faulttolerancesample-normal-flow.png

The above diagram illustrates the normal message flow.

Normal flow:

Step Description
1 The progress Listener starts the work.
2 The Worker schedules work by sending Do messages periodically to itself
3, 4, 5 When receiving Do the Worker tells the CounterService to increment the counter, three times. The Increment message is forwarded to the Counter, which updates its counter variable and sends current value to the Storage.
6, 7 The Worker asks the CounterService of current value of the counter and pipes the result back to the Listener.
../_images/faulttolerancesample-failure-flow.png

The above diagram illustrates what happens in case of storage failure.

Failure flow:

Step Description
1 The Storage throws StorageException.
2 The CounterService is supervisor of the Storage and restarts the Storage when StorageException is thrown.
3, 4, 5, 6 The Storage continues to fail and is restarted.
7 After 3 failures and restarts within 5 seconds the Storage is stopped by its supervisor, i.e. the CounterService.
8 The CounterService is also watching the Storage for termination and receives the Terminated message when the Storage has been stopped ...
9, 10, 11 and tells the Counter that there is no Storage.
12 The CounterService schedules a Reconnect message to itself.
13, 14 When it receives the Reconnect message it creates a new Storage ...
15, 16 and tells the Counter to use the new Storage

Full Source Code of the Fault Tolerance Sample

  1. import java.util.ArrayList;
  2. import java.util.HashMap;
  3. import java.util.List;
  4. import java.util.Map;
  5.  
  6. import akka.actor.*;
  7. import akka.dispatch.Mapper;
  8. import akka.japi.Function;
  9. import scala.concurrent.duration.Duration;
  10. import akka.util.Timeout;
  11. import akka.event.Logging;
  12. import akka.event.LoggingAdapter;
  13. import com.typesafe.config.Config;
  14. import com.typesafe.config.ConfigFactory;
  15.  
  16. import static akka.japi.Util.classTag;
  17.  
  18. import static akka.actor.SupervisorStrategy.resume;
  19. import static akka.actor.SupervisorStrategy.restart;
  20. import static akka.actor.SupervisorStrategy.stop;
  21. import static akka.actor.SupervisorStrategy.escalate;
  22. import akka.actor.SupervisorStrategy.Directive;
  23. import static akka.pattern.Patterns.ask;
  24. import static akka.pattern.Patterns.pipe;
  25.  
  26. import static docs.actor.japi.FaultHandlingDocSample.WorkerApi.*;
  27. import static docs.actor.japi.FaultHandlingDocSample.CounterServiceApi.*;
  28. import static docs.actor.japi.FaultHandlingDocSample.CounterApi.*;
  29. import static docs.actor.japi.FaultHandlingDocSample.StorageApi.*;
  30.  
  31.  
  32. public class FaultHandlingDocSample {
  33.  
  34. /**
  35. * Runs the sample
  36. */
  37. public static void main(String[] args) {
  38. Config config = ConfigFactory.parseString("akka.loglevel = DEBUG \n" +
  39. "akka.actor.debug.lifecycle = on");
  40.  
  41. ActorSystem system = ActorSystem.create("FaultToleranceSample", config);
  42. ActorRef worker = system.actorOf(Props.create(Worker.class), "worker");
  43. ActorRef listener = system.actorOf(Props.create(Listener.class), "listener");
  44. // start the work and listen on progress
  45. // note that the listener is used as sender of the tell,
  46. // i.e. it will receive replies from the worker
  47. worker.tell(Start, listener);
  48. }
  49.  
  50. /**
  51. * Listens on progress from the worker and shuts down the system when enough
  52. * work has been done.
  53. */
  54. public static class Listener extends UntypedActor {
  55. final LoggingAdapter log = Logging.getLogger(getContext().system(), this);
  56.  
  57. @Override
  58. public void preStart() {
  59. // If we don't get any progress within 15 seconds then the service
  60. // is unavailable
  61. getContext().setReceiveTimeout(Duration.create("15 seconds"));
  62. }
  63.  
  64. public void onReceive(Object msg) {
  65. log.debug("received message {}", msg);
  66. if (msg instanceof Progress) {
  67. Progress progress = (Progress) msg;
  68. log.info("Current progress: {} %", progress.percent);
  69. if (progress.percent >= 100.0) {
  70. log.info("That's all, shutting down");
  71. getContext().system().shutdown();
  72. }
  73. } else if (msg == ReceiveTimeout.getInstance()) {
  74. // No progress within 15 seconds, ServiceUnavailable
  75. log.error("Shutting down due to unavailable service");
  76. getContext().system().shutdown();
  77. } else {
  78. unhandled(msg);
  79. }
  80. }
  81. }
  82.  
  83. public interface WorkerApi {
  84. public static final Object Start = "Start";
  85. public static final Object Do = "Do";
  86.  
  87. public static class Progress {
  88. public final double percent;
  89.  
  90. public Progress(double percent) {
  91. this.percent = percent;
  92. }
  93.  
  94. public String toString() {
  95. return String.format("%s(%s)", getClass().getSimpleName(), percent);
  96. }
  97. }
  98. }
  99.  
  100.  
  101. /**
  102. * Worker performs some work when it receives the Start message. It will
  103. * continuously notify the sender of the Start message of current Progress.
  104. * The Worker supervise the CounterService.
  105. */
  106. public static class Worker extends UntypedActor {
  107. final LoggingAdapter log = Logging.getLogger(getContext().system(), this);
  108. final Timeout askTimeout = new Timeout(Duration.create(5, "seconds"));
  109.  
  110. // The sender of the initial Start message will continuously be notified
  111. // about progress
  112. ActorRef progressListener;
  113. final ActorRef counterService = getContext().actorOf(
  114. Props.create(CounterService.class), "counter");
  115. final int totalCount = 51;
  116.  
  117. // Stop the CounterService child if it throws ServiceUnavailable
  118. private static SupervisorStrategy strategy = new OneForOneStrategy(-1,
  119. Duration.Inf(), new Function<Throwable, Directive>() {
  120. @Override
  121. public Directive apply(Throwable t) {
  122. if (t instanceof ServiceUnavailable) {
  123. return stop();
  124. } else {
  125. return escalate();
  126. }
  127. }
  128. });
  129.  
  130. @Override
  131. public SupervisorStrategy supervisorStrategy() {
  132. return strategy;
  133. }
  134.  
  135. public void onReceive(Object msg) {
  136. log.debug("received message {}", msg);
  137. if (msg.equals(Start) && progressListener == null) {
  138. progressListener = getSender();
  139. getContext().system().scheduler().schedule(
  140. Duration.Zero(), Duration.create(1, "second"), getSelf(), Do,
  141. getContext().dispatcher(), null
  142. );
  143. } else if (msg.equals(Do)) {
  144. counterService.tell(new Increment(1), getSelf());
  145. counterService.tell(new Increment(1), getSelf());
  146. counterService.tell(new Increment(1), getSelf());
  147.  
  148. // Send current progress to the initial sender
  149. pipe(ask(counterService, GetCurrentCount, askTimeout)
  150. .mapTo(classTag(CurrentCount.class))
  151. .map(new Mapper<CurrentCount, Progress>() {
  152. public Progress apply(CurrentCount c) {
  153. return new Progress(100.0 * c.count / totalCount);
  154. }
  155. }, getContext().dispatcher()), getContext().dispatcher())
  156. .to(progressListener);
  157. } else {
  158. unhandled(msg);
  159. }
  160. }
  161. }
  162.  
  163. public interface CounterServiceApi {
  164.  
  165. public static final Object GetCurrentCount = "GetCurrentCount";
  166.  
  167. public static class CurrentCount {
  168. public final String key;
  169. public final long count;
  170.  
  171. public CurrentCount(String key, long count) {
  172. this.key = key;
  173. this.count = count;
  174. }
  175.  
  176. public String toString() {
  177. return String.format("%s(%s, %s)", getClass().getSimpleName(), key, count);
  178. }
  179. }
  180.  
  181. public static class Increment {
  182. public final long n;
  183.  
  184. public Increment(long n) {
  185. this.n = n;
  186. }
  187.  
  188. public String toString() {
  189. return String.format("%s(%s)", getClass().getSimpleName(), n);
  190. }
  191. }
  192.  
  193. public static class ServiceUnavailable extends RuntimeException {
  194. private static final long serialVersionUID = 1L;
  195. public ServiceUnavailable(String msg) {
  196. super(msg);
  197. }
  198. }
  199.  
  200. }
  201.  
  202.  
  203. /**
  204. * Adds the value received in Increment message to a persistent counter.
  205. * Replies with CurrentCount when it is asked for CurrentCount. CounterService
  206. * supervise Storage and Counter.
  207. */
  208. public static class CounterService extends UntypedActor {
  209.  
  210. // Reconnect message
  211. static final Object Reconnect = "Reconnect";
  212.  
  213. private static class SenderMsgPair {
  214. final ActorRef sender;
  215. final Object msg;
  216.  
  217. SenderMsgPair(ActorRef sender, Object msg) {
  218. this.msg = msg;
  219. this.sender = sender;
  220. }
  221. }
  222.  
  223. final LoggingAdapter log = Logging.getLogger(getContext().system(), this);
  224. final String key = getSelf().path().name();
  225. ActorRef storage;
  226. ActorRef counter;
  227. final List<SenderMsgPair> backlog = new ArrayList<SenderMsgPair>();
  228. final int MAX_BACKLOG = 10000;
  229.  
  230. // Restart the storage child when StorageException is thrown.
  231. // After 3 restarts within 5 seconds it will be stopped.
  232. private static SupervisorStrategy strategy = new OneForOneStrategy(3,
  233. Duration.create("5 seconds"), new Function<Throwable, Directive>() {
  234. @Override
  235. public Directive apply(Throwable t) {
  236. if (t instanceof StorageException) {
  237. return restart();
  238. } else {
  239. return escalate();
  240. }
  241. }
  242. });
  243.  
  244. @Override
  245. public SupervisorStrategy supervisorStrategy() {
  246. return strategy;
  247. }
  248.  
  249. @Override
  250. public void preStart() {
  251. initStorage();
  252. }
  253.  
  254. /**
  255. * The child storage is restarted in case of failure, but after 3 restarts,
  256. * and still failing it will be stopped. Better to back-off than
  257. * continuously failing. When it has been stopped we will schedule a
  258. * Reconnect after a delay. Watch the child so we receive Terminated message
  259. * when it has been terminated.
  260. */
  261. void initStorage() {
  262. storage = getContext().watch(getContext().actorOf(
  263. Props.create(Storage.class), "storage"));
  264. // Tell the counter, if any, to use the new storage
  265. if (counter != null)
  266. counter.tell(new UseStorage(storage), getSelf());
  267. // We need the initial value to be able to operate
  268. storage.tell(new Get(key), getSelf());
  269. }
  270.  
  271. @Override
  272. public void onReceive(Object msg) {
  273. log.debug("received message {}", msg);
  274. if (msg instanceof Entry && ((Entry) msg).key.equals(key) &&
  275. counter == null) {
  276. // Reply from Storage of the initial value, now we can create the Counter
  277. final long value = ((Entry) msg).value;
  278. counter = getContext().actorOf(Props.create(Counter.class, key, value));
  279. // Tell the counter to use current storage
  280. counter.tell(new UseStorage(storage), getSelf());
  281. // and send the buffered backlog to the counter
  282. for (SenderMsgPair each : backlog) {
  283. counter.tell(each.msg, each.sender);
  284. }
  285. backlog.clear();
  286. } else if (msg instanceof Increment) {
  287. forwardOrPlaceInBacklog(msg);
  288. } else if (msg.equals(GetCurrentCount)) {
  289. forwardOrPlaceInBacklog(msg);
  290. } else if (msg instanceof Terminated) {
  291. // After 3 restarts the storage child is stopped.
  292. // We receive Terminated because we watch the child, see initStorage.
  293. storage = null;
  294. // Tell the counter that there is no storage for the moment
  295. counter.tell(new UseStorage(null), getSelf());
  296. // Try to re-establish storage after while
  297. getContext().system().scheduler().scheduleOnce(
  298. Duration.create(10, "seconds"), getSelf(), Reconnect,
  299. getContext().dispatcher(), null);
  300. } else if (msg.equals(Reconnect)) {
  301. // Re-establish storage after the scheduled delay
  302. initStorage();
  303. } else {
  304. unhandled(msg);
  305. }
  306. }
  307.  
  308. void forwardOrPlaceInBacklog(Object msg) {
  309. // We need the initial value from storage before we can start delegate to
  310. // the counter. Before that we place the messages in a backlog, to be sent
  311. // to the counter when it is initialized.
  312. if (counter == null) {
  313. if (backlog.size() >= MAX_BACKLOG)
  314. throw new ServiceUnavailable("CounterService not available," +
  315. " lack of initial value");
  316. backlog.add(new SenderMsgPair(getSender(), msg));
  317. } else {
  318. counter.forward(msg, getContext());
  319. }
  320. }
  321. }
  322.  
  323. public interface CounterApi {
  324. public static class UseStorage {
  325. public final ActorRef storage;
  326.  
  327. public UseStorage(ActorRef storage) {
  328. this.storage = storage;
  329. }
  330.  
  331. public String toString() {
  332. return String.format("%s(%s)", getClass().getSimpleName(), storage);
  333. }
  334. }
  335. }
  336.  
  337.  
  338. /**
  339. * The in memory count variable that will send current value to the Storage,
  340. * if there is any storage available at the moment.
  341. */
  342. public static class Counter extends UntypedActor {
  343. final LoggingAdapter log = Logging.getLogger(getContext().system(), this);
  344. final String key;
  345. long count;
  346. ActorRef storage;
  347.  
  348. public Counter(String key, long initialValue) {
  349. this.key = key;
  350. this.count = initialValue;
  351. }
  352.  
  353. @Override
  354. public void onReceive(Object msg) {
  355. log.debug("received message {}", msg);
  356. if (msg instanceof UseStorage) {
  357. storage = ((UseStorage) msg).storage;
  358. storeCount();
  359. } else if (msg instanceof Increment) {
  360. count += ((Increment) msg).n;
  361. storeCount();
  362. } else if (msg.equals(GetCurrentCount)) {
  363. getSender().tell(new CurrentCount(key, count), getSelf());
  364. } else {
  365. unhandled(msg);
  366. }
  367. }
  368.  
  369. void storeCount() {
  370. // Delegate dangerous work, to protect our valuable state.
  371. // We can continue without storage.
  372. if (storage != null) {
  373. storage.tell(new Store(new Entry(key, count)), getSelf());
  374. }
  375. }
  376. }
  377.  
  378. public interface StorageApi {
  379.  
  380. public static class Store {
  381. public final Entry entry;
  382.  
  383. public Store(Entry entry) {
  384. this.entry = entry;
  385. }
  386.  
  387. public String toString() {
  388. return String.format("%s(%s)", getClass().getSimpleName(), entry);
  389. }
  390. }
  391.  
  392. public static class Entry {
  393. public final String key;
  394. public final long value;
  395.  
  396. public Entry(String key, long value) {
  397. this.key = key;
  398. this.value = value;
  399. }
  400.  
  401. public String toString() {
  402. return String.format("%s(%s, %s)", getClass().getSimpleName(), key, value);
  403. }
  404. }
  405.  
  406. public static class Get {
  407. public final String key;
  408.  
  409. public Get(String key) {
  410. this.key = key;
  411. }
  412.  
  413. public String toString() {
  414. return String.format("%s(%s)", getClass().getSimpleName(), key);
  415. }
  416. }
  417.  
  418. public static class StorageException extends RuntimeException {
  419. private static final long serialVersionUID = 1L;
  420. public StorageException(String msg) {
  421. super(msg);
  422. }
  423. }
  424. }
  425.  
  426.  
  427. /**
  428. * Saves key/value pairs to persistent storage when receiving Store message.
  429. * Replies with current value when receiving Get message. Will throw
  430. * StorageException if the underlying data store is out of order.
  431. */
  432. public static class Storage extends UntypedActor {
  433.  
  434. final LoggingAdapter log = Logging.getLogger(getContext().system(), this);
  435. final DummyDB db = DummyDB.instance;
  436.  
  437. @Override
  438. public void onReceive(Object msg) {
  439. log.debug("received message {}", msg);
  440. if (msg instanceof Store) {
  441. Store store = (Store) msg;
  442. db.save(store.entry.key, store.entry.value);
  443. } else if (msg instanceof Get) {
  444. Get get = (Get) msg;
  445. Long value = db.load(get.key);
  446. getSender().tell(new Entry(get.key, value == null ?
  447. Long.valueOf(0L) : value), getSelf());
  448. } else {
  449. unhandled(msg);
  450. }
  451. }
  452. }
  453.  
  454. public static class DummyDB {
  455. public static final DummyDB instance = new DummyDB();
  456. private final Map<String, Long> db = new HashMap<String, Long>();
  457.  
  458. private DummyDB() {
  459. }
  460.  
  461. public synchronized void save(String key, Long value) throws StorageException {
  462. if (11 <= value && value <= 14)
  463. throw new StorageException("Simulated store failure " + value);
  464. db.put(key, value);
  465. }
  466.  
  467. public synchronized Long load(String key) throws StorageException {
  468. return db.get(key);
  469. }
  470. }
  471. }