Distributed System Development with Distributed System Development with
ScalaLoci ScalaLoci
Pascal Weisenburger, Mirko Köhler, Guido Salvaneschi TU Darmstadt, Germany
1
ScalaLoci ScalaLoci Pascal Weisenburger, Mirko Khler, Guido - - PowerPoint PPT Presentation
Distributed System Development with Distributed System Development with ScalaLoci ScalaLoci Pascal Weisenburger, Mirko Khler, Guido Salvaneschi TU Darmstadt, Germany 1 class TaskManagerGateway { class TaskManager extends Actor { def
1
class TaskManagerGateway { def disconnectFromJobManager(instanceId: InstanceID, cause: Exception, mgr: ActorRef) = { mgr ! Disconnect(instanceId, cause) } def stopCluster(applicationStatus: ApplicationStatus, message: String, mgr: ActorRef) = { mgr ! StopCluster(applicationStatus, message) } def requestStackTrace(mgr: ActorRef) = { (mgr ? SendStackTrace).mapTo[StackTrace] } def submitTask(tdd: TaskDeploymentDescriptor, mgr: ActorRef) = { (mgr ? SubmitTask(tdd)).mapTo[Acknowledge] } def stopTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? StopTask(executionAttemptID)).mapTo[Acknowledge] } def cancelTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? CancelTask(executionAttemptID).mapTo[Acknowledge] } def updatePartitions(executionAttemptID: ExecutionAttemptID, partitionInfos: Iterable[PartitionInfo], mgr: ActorRef) = { (mgr ? UpdateTaskMultiplePartitionInfos(executionAttemptID, partitionInfos)) .mapTo[Acknowledge] } def failPartition(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { mgr ! FailIntermediateResultPartitions(executionAttemptID) } def notifyCheckpointComplete(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, mgr: ActorRef) = { mgr ! NotifyCheckpointComplete(jobId, executionAttemptID, checkpointId, timestamp) } def triggerCheckpoint(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, checkpointOptions: CheckpointOptions, mgr: ActorRef) = { mgr ! TriggerCheckpoint(jobId, executionAttemptID, checkpointId, timestamp, checkpointOptions) } def requestTaskManagerLog(logTypeRequest: LogTypeRequest, mgr: ActorRef) = { (mgr ? RequestTaskManagerLog(logTypeRequest)).mapTo[BlobKey] } } class TaskManager extends Actor { def receive = { case SendStackTrace => sendStackTrace() foreach { message => sender ! decorateMessage(message) } case Disconnect(instanceIdToDisconnect, cause) => if (instanceIdToDisconnect.equals(instanceID)) { handleJobManagerDisconnect("JobManager requested disconnect: " + cause.getMessage()) triggerTaskManagerRegistration() } else { log.debug("Received disconnect message for wrong instance id " + instanceIdToDisconnect) } case StopCluster(applicationStatus, message) => log.info(s"Stopping TaskManager with final application status " + s"$applicationStatus and diagnostics: $message") shutdown() case RequestTaskManagerLog(requestType) => blobService match { case Some(_) => handleRequestTaskManagerLog(requestType, currentJobManager.get) match { case Left(message) => sender() ! message case Right(message) => sender() ! message } case None => sender() ! akka.actor.Status.Failure(new IOException( "BlobService not available. Cannot upload TaskManager logs.")) } case UpdateTaskMultiplePartitionInfos(executionID, partitionInfos) => sender ! decorateMessage(updateTaskInputPartitions(executionID, partitionInfos)) case FailIntermediateResultPartitions(executionID) => log.info(s"Discarding the results produced by task execution $executionID") try { network.getResultPartitionManager.releasePartitionsProducedBy(executionID) } catch { case t: Throwable => killTaskManagerFatal( "Fatal leak: Unable to release intermediate result partition data", t) } case SubmitTask(tdd) => sender ! decorateMessage(submitTask(tdd)) case StopTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { try { task.stopExecution() sender ! decorateMessage(Acknowledge.get()) } catch { case t: Throwable => sender ! decorateMessage(Status.Failure(t)) } } else { log.debug(s"Cannot find task to stop for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case CancelTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { task.cancelExecution() sender ! decorateMessage(Acknowledge.get()) } else { log.debug(s"Cannot find task to cancel for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case TriggerCheckpoint(jobId, taskExecutionId, checkpointId, timestamp, checkpointOptions) => log.debug(s"Receiver TriggerCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.triggerCheckpointBarrier(checkpointId, timestamp, checkpointOptions) } else { log.debug(s"TaskManager received a checkpoint request " + s"for unknown task $taskExecutionId.") } case NotifyCheckpointComplete(jobId, taskExecutionId, checkpointId, timestamp) => log.debug(s"Receiver ConfirmCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.notifyCheckpointComplete(checkpointId) } else { log.debug(s"TaskManager received a checkpoint confirmation " + s"for unknown task $taskExecutionId.") } } }
2
class TaskManagerGateway { def disconnectFromJobManager(instanceId: InstanceID, cause: Exception, mgr: ActorRef) = { mgr ! Disconnect(instanceId, cause) } def stopCluster(applicationStatus: ApplicationStatus, message: String, mgr: ActorRef) = { mgr ! StopCluster(applicationStatus, message) } def requestStackTrace(mgr: ActorRef) = { (mgr ? SendStackTrace).mapTo[StackTrace] } def submitTask(tdd: TaskDeploymentDescriptor, mgr: ActorRef) = { (mgr ? SubmitTask(tdd)).mapTo[Acknowledge] } def stopTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? StopTask(executionAttemptID)).mapTo[Acknowledge] } def cancelTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? CancelTask(executionAttemptID).mapTo[Acknowledge] } def updatePartitions(executionAttemptID: ExecutionAttemptID, partitionInfos: Iterable[PartitionInfo], mgr: ActorRef) = { (mgr ? UpdateTaskMultiplePartitionInfos(executionAttemptID, partitionInfos)) .mapTo[Acknowledge] } def failPartition(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { mgr ! FailIntermediateResultPartitions(executionAttemptID) } def notifyCheckpointComplete(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, mgr: ActorRef) = { mgr ! NotifyCheckpointComplete(jobId, executionAttemptID, checkpointId, timestamp) } def triggerCheckpoint(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, checkpointOptions: CheckpointOptions, mgr: ActorRef) = { mgr ! TriggerCheckpoint(jobId, executionAttemptID, checkpointId, timestamp, checkpointOptions) } def requestTaskManagerLog(logTypeRequest: LogTypeRequest, mgr: ActorRef) = { (mgr ? RequestTaskManagerLog(logTypeRequest)).mapTo[BlobKey] } } class TaskManager extends Actor { def receive = { case SendStackTrace => sendStackTrace() foreach { message => sender ! decorateMessage(message) } case Disconnect(instanceIdToDisconnect, cause) => if (instanceIdToDisconnect.equals(instanceID)) { handleJobManagerDisconnect("JobManager requested disconnect: " + cause.getMessage()) triggerTaskManagerRegistration() } else { log.debug("Received disconnect message for wrong instance id " + instanceIdToDisconnect) } case StopCluster(applicationStatus, message) => log.info(s"Stopping TaskManager with final application status " + s"$applicationStatus and diagnostics: $message") shutdown() case RequestTaskManagerLog(requestType) => blobService match { case Some(_) => handleRequestTaskManagerLog(requestType, currentJobManager.get) match { case Left(message) => sender() ! message case Right(message) => sender() ! message } case None => sender() ! akka.actor.Status.Failure(new IOException( "BlobService not available. Cannot upload TaskManager logs.")) } case UpdateTaskMultiplePartitionInfos(executionID, partitionInfos) => sender ! decorateMessage(updateTaskInputPartitions(executionID, partitionInfos)) case FailIntermediateResultPartitions(executionID) => log.info(s"Discarding the results produced by task execution $executionID") try { network.getResultPartitionManager.releasePartitionsProducedBy(executionID) } catch { case t: Throwable => killTaskManagerFatal( "Fatal leak: Unable to release intermediate result partition data", t) } case SubmitTask(tdd) => sender ! decorateMessage(submitTask(tdd)) case StopTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { try { task.stopExecution() sender ! decorateMessage(Acknowledge.get()) } catch { case t: Throwable => sender ! decorateMessage(Status.Failure(t)) } } else { log.debug(s"Cannot find task to stop for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case CancelTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { task.cancelExecution() sender ! decorateMessage(Acknowledge.get()) } else { log.debug(s"Cannot find task to cancel for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case TriggerCheckpoint(jobId, taskExecutionId, checkpointId, timestamp, checkpointOptions) => log.debug(s"Receiver TriggerCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.triggerCheckpointBarrier(checkpointId, timestamp, checkpointOptions) } else { log.debug(s"TaskManager received a checkpoint request " + s"for unknown task $taskExecutionId.") } case NotifyCheckpointComplete(jobId, taskExecutionId, checkpointId, timestamp) => log.debug(s"Receiver ConfirmCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.notifyCheckpointComplete(checkpointId) } else { log.debug(s"TaskManager received a checkpoint confirmation " + s"for unknown task $taskExecutionId.") } } } class TaskManagerActions extends Actor { def receive = { case TaskInFinalState(executionID) => val task = runningTasks.remove(executionID) if (task != null) { // the task must be in a terminal state if (!task.getExecutionState.isTerminal) { try { task.failExternally(new Exception("Task is being removed from TaskManager")) } catch { case e: Exception => log.error("Could not properly fail task", e) } } log.info(s"Un-registering task and sending final execution state " + s"${task.getExecutionState} to JobManager for task " + s"${task.getTaskInfo.getTaskName} (${task.getExecutionId})") val accumulators = { val registry = task.getAccumulatorRegistry registry.getSnapshot } self ! decorateMessage( UpdateTaskExecutionState( new TaskExecutionState( task.getJobID, task.getExecutionId, task.getExecutionState, task.getFailureCause, accumulators, task.getMetricGroup.getIOMetricGroup.createSnapshot()) ) ) } else { log.error(s"Cannot find task with ID $executionID to unregister.") } case FatalError(message, cause) => log.error("\n" + "==============================================================\n" + "====================== FATAL =======================\n" + "==============================================================\n" + "\n" + "A fatal error occurred, forcing the TaskManager to shut down: " + message, cause) self ! Kill case FailTask(executionID, cause) => val task = runningTasks.get(executionID) if (task != null) { task.failExternally(cause) } else { log.debug(s"Cannot find task to fail for execution $executionID)") } case UpdateTaskExecutionState(taskExecutionState: TaskExecutionState) => // we receive these from our tasks and forward them to the JobManager currentJobManager foreach { jobManager => { val futureResponse = (jobManager ? decorateMessage(UpdateTaskExecutionState(taskExecutionState)))(askTimeout) val executionID = taskExecutionState.getID futureResponse.mapTo[Boolean].onComplete { // IMPORTANT: In the future callback, we cannot directly modify state // but only send messages to the TaskManager to do those changes case scala.util.Success(result) => if (!result) { self ! decorateMessage( FailTask( executionID, new Exception("Task has been cancelled on the JobManager.")) ) } case scala.util.Failure(t) => self ! decorateMessage( FailTask( executionID, new Exception( "Failed to send ExecutionStateChange notification to JobManager", t)) ) }(context.dispatcher) } } } } c l a s s C h e c k p
n t R e s p
d e r e x t e n d s A c t
{ d e f r e c e i v e = { c a s e a c k M e s s a g e : A c k n
l e d g e C h e c k p
n t = > v a l j i d = a c k M e s s a g e . g e t J
( ) c u r r e n t J
s . g e t ( j i d ) m a t c h { c a s e S
e ( ( g r a p h , _ ) ) = > v a l c h e c k p
n t C
d i n a t
= g r a p h . g e t C h e c k p
n t C
d i n a t
( ) i f ( c h e c k p
n t C
d i n a t
! = n u l l ) { f u t u r e { t r y { i f ( ! c h e c k p
n t C
d i n a t
. r e c e i v e A c k n
l e d g e M e s s a g e ( a c k M e s s a g e ) ) { l
. i n f
" R e c e i v e d m e s s a g e f
n
x i s t i n g c h e c k p
n t " + a c k M e s s a g e . g e t C h e c k p
n t I d ) } } c a t c h { c a s e t : T h r
a b l e = > l
. e r r
( s " E r r
i n C h e c k p
n t C
d i n a t
w h i l e p r
e s s i n g $ a c k M e s s a g e " , t ) } } ( c
t e x t . d i s p a t c h e r ) } e l s e { l
. e r r
( s " R e c e i v e d A c k n
l e d g e C h e c k p
n t m e s s a g e f
j
$ j i d w i t h n
+ s " C h e c k p
n t C
d i n a t
" ) } c a s e N
e = > l
. e r r
( s " R e c e i v e d A c k n
l e d g e C h e c k p
n t f
u n a v a i l a b l e j
$ j i d " ) } c a s e d e c l i n e M e s s a g e : D e c l i n e C h e c k p
n t = > v a l j i d = d e c l i n e M e s s a g e . g e t J
( ) c u r r e n t J
s . g e t ( j i d ) m a t c h { c a s e S
e ( ( g r a p h , _ ) ) = > v a l c h e c k p
n t C
d i n a t
= g r a p h . g e t C h e c k p
n t C
d i n a t
( ) i f ( c h e c k p
n t C
d i n a t
! = n u l l ) { f u t u r e { t r y { c h e c k p
n t C
d i n a t
. r e c e i v e D e c l i n e M e s s a g e ( d e c l i n e M e s s a g e ) } c a t c h { c a s e t : T h r
a b l e = > l
. e r r
( s " E r r
i n C h e c k p
n t C
d i n a t
w h i l e p r
e s s i n g $ d e c l i n e M e s s a g e " , t } } ( c
t e x t . d i s p a t c h e r ) } e l s e { l
. e r r
( s " R e c e i v e d D e c l i n e C h e c k p
n t m e s s a g e f
j
$ j i d w i t h n
+ s " C h e c k p
n t C
d i n a t
" ) } c a s e N
e = > l
. e r r
( s " R e c e i v e d D e c l i n e C h e c k p
n t f
u n a v a i l a b l e j
$ j i d " ) } } c l a s s K v S t a t e R e g i s t r y L i s t e n e r e x t e n d s A c t
{ d e f r e c e i v e = { c a s e m s g : N
i f y K v S t a t e R e g i s t e r e d = > c u r r e n t J
s . g e t ( m s g . g e t J
I d ) m a t c h { c a s e S
e ( ( g r a p h , _ ) ) = > t r y { l
. d e b u g ( s " K e y v a l u e s t a t e r e g i s t e r e d f
j
$ { m s g . g e t J
I d } u n d e r " + s " n a m e $ { m s g . g e t R e g i s t r a t i
N a m e } . " ) g r a p h . g e t K v S t a t e L
a t i
R e g i s t r y . n
i f y K v S t a t e R e g i s t e r e d ( m s g . g e t J
V e r t e x I d , m s g . g e t K e y G r
p R a n g e , m s g . g e t R e g i s t r a t i
N a m e , m s g . g e t K v S t a t e I d , m s g . g e t K v S t a t e S e r v e r A d d r e s s ) } c a t c h { c a s e t : T h r
a b l e = > l
. e r r
( s " F a i l e d t
i f y K v S t a t e R e g i s t r y a b
t r e g i s t r a t i
$ m s g . " ) } c a s e N
e = > l
. e r r
( s " R e c e i v e d $ m s g f
u n a v a i l a b l e j
. " ) } / / T a s k M a n a g e r K v S t a t e u n r e g i s t r a t i
c a s e m s g : N
i f y K v S t a t e U n r e g i s t e r e d = > c u r r e n t J
s . g e t ( m s g . g e t J
I d ) m a t c h { c a s e S
e ( ( g r a p h , _ ) ) = > t r y { g r a p h . g e t K v S t a t e L
a t i
R e g i s t r y . n
i f y K v S t a t e U n r e g i s t e r e d ( m s g . g e t J
V e r t e x I d , m s g . g e t K e y G r
p R a n g e , m s g . g e t R e g i s t r a t i
N a m e ) } c a t c h { c a s e t : T h r
a b l e = > l
. e r r
( s " F a i l e d t
i f y K v S t a t e R e g i s t r y a b
t r e g i s t r a t i
$ m s g . " ) } c a s e N
e = > l
. e r r
( s " R e c e i v e d $ m s g f
u n a v a i l a b l e j
. " ) } }
3
4
trait Registry extends Peer trait Node extends Peer
val message: Event[String] on Registry = placed { getMessageStream() }
5
Node Node Node Node Node Registry
trait Registry extends Peer { type Tie <: Multiple[Node] } trait Node extends Peer { type Tie <: Single[Registry] with Multiple[Node] }
6
trait Registry extends Peer { type Tie <: Single[Node] } trait Node extends Peer { type Tie <: Single[Registry] } val message: Event[String] on Node placed[Registry] { message.asLocal: Event[String] }
Node Registry
7
trait Registry extends Peer { type Tie <: Multiple[Node] } trait Node extends Peer { type Tie <: Single[Registry] } val message: Event[String] on Node placed[Registry] { message.asLocalFromAll: Map[Remote[Node], Event[String]] }
Node Node Node Registry
8
9
Node Registry message process message result
val message = Event[String]() val result = message map processMessage val ui = new UI(result)
10
val message: Event[String] on Node = placed[Node] { Event[String]() } val result = placed[Registry] { message.asLocal map processMessage } val ui = placed[Node] { new UI(result.asLocal) }
Node Registry message process message result
11
@multitier object Chat { trait Registry extends Peer { type Tie <: Multiple[Node] } trait Node extends Peer { type Tie <: Single[Registry] } val message = placed[Node] { Event[String]() } val publicMessage = placed[Registry] { message.asLocalFromAllSeq map { case (_, msg) => msg } } placed[Node].main { publicMessage.asLocal observe println for (line <- io.Source.stdin.getLines) message.fire(line) } } Registry Node
println
Node
println public Message message message
12
13
14
Master Proxy Master Worker
trait MasterProxy extends Peer { type Tie <: Multiple[Master] with Multiple[Worker] } trait Worker extends Peer { type Tie <: Single[MasterProxy] with Optional[Master] } trait Master extends Peer { type Tie <: Multiple[MasterProxy] with Multiple[Worker] }
15
Multiple modules CheckpointResponder KvStateRegistryListener PartitionProducerStateChecker ResultPartitionConsumableNotier TaskManager TaskManagerActions Eliminated 23 non-exhaustive pattern matches and 8 type casts
16
Latency
4 6 8 Number of workers 1000 800 600 400 200 Latency [ms]
Flink ScalaLoci Flink
Cumulative Distribution
0.2 0.4 0.6 0.8 1.0 Fraction of tuples complete 1500 1000 500
Flink, 4 workers Flink, 6 workers Flink, 8 workers ScalaLoci Flink, 4 workers ScalaLoci Flink, 6 workers ScalaLoci Flink, 8 workers
17
18
19
Node Node Node Node Node Registry
Node Registry message process message result
Value on Peer
20