[PPT] - ScalaLoci ScalaLoci Pascal Weisenburger, Mirko Khler, Guido PowerPoint Presentation

SLIDE 1

Distributed System Development with Distributed System Development with

ScalaLoci ScalaLoci

Pascal Weisenburger, Mirko Köhler, Guido Salvaneschi TU Darmstadt, Germany

1

SLIDE 2

class TaskManagerGateway { def disconnectFromJobManager(instanceId: InstanceID, cause: Exception, mgr: ActorRef) = { mgr ! Disconnect(instanceId, cause) } def stopCluster(applicationStatus: ApplicationStatus, message: String, mgr: ActorRef) = { mgr ! StopCluster(applicationStatus, message) } def requestStackTrace(mgr: ActorRef) = { (mgr ? SendStackTrace).mapTo[StackTrace] } def submitTask(tdd: TaskDeploymentDescriptor, mgr: ActorRef) = { (mgr ? SubmitTask(tdd)).mapTo[Acknowledge] } def stopTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? StopTask(executionAttemptID)).mapTo[Acknowledge] } def cancelTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? CancelTask(executionAttemptID).mapTo[Acknowledge] } def updatePartitions(executionAttemptID: ExecutionAttemptID, partitionInfos: Iterable[PartitionInfo], mgr: ActorRef) = { (mgr ? UpdateTaskMultiplePartitionInfos(executionAttemptID, partitionInfos)) .mapTo[Acknowledge] } def failPartition(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { mgr ! FailIntermediateResultPartitions(executionAttemptID) } def notifyCheckpointComplete(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, mgr: ActorRef) = { mgr ! NotifyCheckpointComplete(jobId, executionAttemptID, checkpointId, timestamp) } def triggerCheckpoint(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, checkpointOptions: CheckpointOptions, mgr: ActorRef) = { mgr ! TriggerCheckpoint(jobId, executionAttemptID, checkpointId, timestamp, checkpointOptions) } def requestTaskManagerLog(logTypeRequest: LogTypeRequest, mgr: ActorRef) = { (mgr ? RequestTaskManagerLog(logTypeRequest)).mapTo[BlobKey] } } class TaskManager extends Actor { def receive = { case SendStackTrace => sendStackTrace() foreach { message => sender ! decorateMessage(message) } case Disconnect(instanceIdToDisconnect, cause) => if (instanceIdToDisconnect.equals(instanceID)) { handleJobManagerDisconnect("JobManager requested disconnect: " + cause.getMessage()) triggerTaskManagerRegistration() } else { log.debug("Received disconnect message for wrong instance id " + instanceIdToDisconnect) } case StopCluster(applicationStatus, message) => log.info(s"Stopping TaskManager with final application status " + s"$applicationStatus and diagnostics: $message") shutdown() case RequestTaskManagerLog(requestType) => blobService match { case Some(_) => handleRequestTaskManagerLog(requestType, currentJobManager.get) match { case Left(message) => sender() ! message case Right(message) => sender() ! message } case None => sender() ! akka.actor.Status.Failure(new IOException( "BlobService not available. Cannot upload TaskManager logs.")) } case UpdateTaskMultiplePartitionInfos(executionID, partitionInfos) => sender ! decorateMessage(updateTaskInputPartitions(executionID, partitionInfos)) case FailIntermediateResultPartitions(executionID) => log.info(s"Discarding the results produced by task execution $executionID") try { network.getResultPartitionManager.releasePartitionsProducedBy(executionID) } catch { case t: Throwable => killTaskManagerFatal( "Fatal leak: Unable to release intermediate result partition data", t) } case SubmitTask(tdd) => sender ! decorateMessage(submitTask(tdd)) case StopTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { try { task.stopExecution() sender ! decorateMessage(Acknowledge.get()) } catch { case t: Throwable => sender ! decorateMessage(Status.Failure(t)) } } else { log.debug(s"Cannot find task to stop for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case CancelTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { task.cancelExecution() sender ! decorateMessage(Acknowledge.get()) } else { log.debug(s"Cannot find task to cancel for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case TriggerCheckpoint(jobId, taskExecutionId, checkpointId, timestamp, checkpointOptions) => log.debug(s"Receiver TriggerCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.triggerCheckpointBarrier(checkpointId, timestamp, checkpointOptions) } else { log.debug(s"TaskManager received a checkpoint request " + s"for unknown task $taskExecutionId.") } case NotifyCheckpointComplete(jobId, taskExecutionId, checkpointId, timestamp) => log.debug(s"Receiver ConfirmCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.notifyCheckpointComplete(checkpointId) } else { log.debug(s"TaskManager received a checkpoint confirmation " + s"for unknown task $taskExecutionId.") } } }

2

SLIDE 3

class TaskManagerGateway { def disconnectFromJobManager(instanceId: InstanceID, cause: Exception, mgr: ActorRef) = { mgr ! Disconnect(instanceId, cause) } def stopCluster(applicationStatus: ApplicationStatus, message: String, mgr: ActorRef) = { mgr ! StopCluster(applicationStatus, message) } def requestStackTrace(mgr: ActorRef) = { (mgr ? SendStackTrace).mapTo[StackTrace] } def submitTask(tdd: TaskDeploymentDescriptor, mgr: ActorRef) = { (mgr ? SubmitTask(tdd)).mapTo[Acknowledge] } def stopTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? StopTask(executionAttemptID)).mapTo[Acknowledge] } def cancelTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? CancelTask(executionAttemptID).mapTo[Acknowledge] } def updatePartitions(executionAttemptID: ExecutionAttemptID, partitionInfos: Iterable[PartitionInfo], mgr: ActorRef) = { (mgr ? UpdateTaskMultiplePartitionInfos(executionAttemptID, partitionInfos)) .mapTo[Acknowledge] } def failPartition(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { mgr ! FailIntermediateResultPartitions(executionAttemptID) } def notifyCheckpointComplete(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, mgr: ActorRef) = { mgr ! NotifyCheckpointComplete(jobId, executionAttemptID, checkpointId, timestamp) } def triggerCheckpoint(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, checkpointOptions: CheckpointOptions, mgr: ActorRef) = { mgr ! TriggerCheckpoint(jobId, executionAttemptID, checkpointId, timestamp, checkpointOptions) } def requestTaskManagerLog(logTypeRequest: LogTypeRequest, mgr: ActorRef) = { (mgr ? RequestTaskManagerLog(logTypeRequest)).mapTo[BlobKey] } } class TaskManager extends Actor { def receive = { case SendStackTrace => sendStackTrace() foreach { message => sender ! decorateMessage(message) } case Disconnect(instanceIdToDisconnect, cause) => if (instanceIdToDisconnect.equals(instanceID)) { handleJobManagerDisconnect("JobManager requested disconnect: " + cause.getMessage()) triggerTaskManagerRegistration() } else { log.debug("Received disconnect message for wrong instance id " + instanceIdToDisconnect) } case StopCluster(applicationStatus, message) => log.info(s"Stopping TaskManager with final application status " + s"$applicationStatus and diagnostics: $message") shutdown() case RequestTaskManagerLog(requestType) => blobService match { case Some(_) => handleRequestTaskManagerLog(requestType, currentJobManager.get) match { case Left(message) => sender() ! message case Right(message) => sender() ! message } case None => sender() ! akka.actor.Status.Failure(new IOException( "BlobService not available. Cannot upload TaskManager logs.")) } case UpdateTaskMultiplePartitionInfos(executionID, partitionInfos) => sender ! decorateMessage(updateTaskInputPartitions(executionID, partitionInfos)) case FailIntermediateResultPartitions(executionID) => log.info(s"Discarding the results produced by task execution $executionID") try { network.getResultPartitionManager.releasePartitionsProducedBy(executionID) } catch { case t: Throwable => killTaskManagerFatal( "Fatal leak: Unable to release intermediate result partition data", t) } case SubmitTask(tdd) => sender ! decorateMessage(submitTask(tdd)) case StopTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { try { task.stopExecution() sender ! decorateMessage(Acknowledge.get()) } catch { case t: Throwable => sender ! decorateMessage(Status.Failure(t)) } } else { log.debug(s"Cannot find task to stop for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case CancelTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { task.cancelExecution() sender ! decorateMessage(Acknowledge.get()) } else { log.debug(s"Cannot find task to cancel for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case TriggerCheckpoint(jobId, taskExecutionId, checkpointId, timestamp, checkpointOptions) => log.debug(s"Receiver TriggerCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.triggerCheckpointBarrier(checkpointId, timestamp, checkpointOptions) } else { log.debug(s"TaskManager received a checkpoint request " + s"for unknown task $taskExecutionId.") } case NotifyCheckpointComplete(jobId, taskExecutionId, checkpointId, timestamp) => log.debug(s"Receiver ConfirmCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.notifyCheckpointComplete(checkpointId) } else { log.debug(s"TaskManager received a checkpoint confirmation " + s"for unknown task $taskExecutionId.") } } } class TaskManagerActions extends Actor { def receive = { case TaskInFinalState(executionID) => val task = runningTasks.remove(executionID) if (task != null) { // the task must be in a terminal state if (!task.getExecutionState.isTerminal) { try { task.failExternally(new Exception("Task is being removed from TaskManager")) } catch { case e: Exception => log.error("Could not properly fail task", e) } } log.info(s"Un-registering task and sending final execution state " + s"${task.getExecutionState} to JobManager for task " + s"${task.getTaskInfo.getTaskName} (${task.getExecutionId})") val accumulators = { val registry = task.getAccumulatorRegistry registry.getSnapshot } self ! decorateMessage( UpdateTaskExecutionState( new TaskExecutionState( task.getJobID, task.getExecutionId, task.getExecutionState, task.getFailureCause, accumulators, task.getMetricGroup.getIOMetricGroup.createSnapshot()) ) ) } else { log.error(s"Cannot find task with ID $executionID to unregister.") } case FatalError(message, cause) => log.error("\n" + "==============================================================\n" + "====================== FATAL =======================\n" + "==============================================================\n" + "\n" + "A fatal error occurred, forcing the TaskManager to shut down: " + message, cause) self ! Kill case FailTask(executionID, cause) => val task = runningTasks.get(executionID) if (task != null) { task.failExternally(cause) } else { log.debug(s"Cannot find task to fail for execution $executionID)") } case UpdateTaskExecutionState(taskExecutionState: TaskExecutionState) => // we receive these from our tasks and forward them to the JobManager currentJobManager foreach { jobManager => { val futureResponse = (jobManager ? decorateMessage(UpdateTaskExecutionState(taskExecutionState)))(askTimeout) val executionID = taskExecutionState.getID futureResponse.mapTo[Boolean].onComplete { // IMPORTANT: In the future callback, we cannot directly modify state // but only send messages to the TaskManager to do those changes case scala.util.Success(result) => if (!result) { self ! decorateMessage( FailTask( executionID, new Exception("Task has been cancelled on the JobManager.")) ) } case scala.util.Failure(t) => self ! decorateMessage( FailTask( executionID, new Exception( "Failed to send ExecutionStateChange notification to JobManager", t)) ) }(context.dispatcher) } } } } c l a s s C h e c k p

i

n t R e s p

n

d e r e x t e n d s A c t

r

{ d e f r e c e i v e = { c a s e a c k M e s s a g e : A c k n

w

l e d g e C h e c k p

i

n t = > v a l j i d = a c k M e s s a g e . g e t J

b

( ) c u r r e n t J

b

s . g e t ( j i d ) m a t c h { c a s e S

m

e ( ( g r a p h , _ ) ) = > v a l c h e c k p

i

n t C

r

d i n a t

r

= g r a p h . g e t C h e c k p

i

n t C

r

d i n a t

r

( ) i f ( c h e c k p

i

n t C

r

d i n a t

r

! = n u l l ) { f u t u r e { t r y { i f ( ! c h e c k p

i

n t C

r

d i n a t

r

. r e c e i v e A c k n

w

l e d g e M e s s a g e ( a c k M e s s a g e ) ) { l

g

. i n f

(

" R e c e i v e d m e s s a g e f

r

n

n
e

x i s t i n g c h e c k p

i

n t " + a c k M e s s a g e . g e t C h e c k p

i

n t I d ) } } c a t c h { c a s e t : T h r

w

a b l e = > l

g

. e r r

r

( s " E r r

r

i n C h e c k p

i

n t C

r

d i n a t

r

w h i l e p r

c

e s s i n g $ a c k M e s s a g e " , t ) } } ( c

n

t e x t . d i s p a t c h e r ) } e l s e { l

g

. e r r

r

( s " R e c e i v e d A c k n

w

l e d g e C h e c k p

i

n t m e s s a g e f

r

j

b

$ j i d w i t h n

"

+ s " C h e c k p

i

n t C

r

d i n a t

r

" ) } c a s e N

n

e = > l

g

. e r r

r

( s " R e c e i v e d A c k n

w

l e d g e C h e c k p

i

n t f

r

u n a v a i l a b l e j

b

$ j i d " ) } c a s e d e c l i n e M e s s a g e : D e c l i n e C h e c k p

i

n t = > v a l j i d = d e c l i n e M e s s a g e . g e t J

b

( ) c u r r e n t J

b

s . g e t ( j i d ) m a t c h { c a s e S

m

e ( ( g r a p h , _ ) ) = > v a l c h e c k p

i

n t C

r

d i n a t

r

= g r a p h . g e t C h e c k p

i

n t C

r

d i n a t

r

( ) i f ( c h e c k p

i

n t C

r

d i n a t

r

! = n u l l ) { f u t u r e { t r y { c h e c k p

i

n t C

r

d i n a t

r

. r e c e i v e D e c l i n e M e s s a g e ( d e c l i n e M e s s a g e ) } c a t c h { c a s e t : T h r

w

a b l e = > l

g

. e r r

r

( s " E r r

r

i n C h e c k p

i

n t C

r

d i n a t

r

w h i l e p r

c

e s s i n g $ d e c l i n e M e s s a g e " , t } } ( c

n

t e x t . d i s p a t c h e r ) } e l s e { l

g

. e r r

r

( s " R e c e i v e d D e c l i n e C h e c k p

i

n t m e s s a g e f

r

j

b

$ j i d w i t h n

"

+ s " C h e c k p

i

n t C

r

d i n a t

r

" ) } c a s e N

n

e = > l

g

. e r r

r

( s " R e c e i v e d D e c l i n e C h e c k p

i

n t f

r

u n a v a i l a b l e j

b

$ j i d " ) } } c l a s s K v S t a t e R e g i s t r y L i s t e n e r e x t e n d s A c t

r

{ d e f r e c e i v e = { c a s e m s g : N

t

i f y K v S t a t e R e g i s t e r e d = > c u r r e n t J

b

s . g e t ( m s g . g e t J

b

I d ) m a t c h { c a s e S

m

e ( ( g r a p h , _ ) ) = > t r y { l

g

. d e b u g ( s " K e y v a l u e s t a t e r e g i s t e r e d f

r

j

b

$ { m s g . g e t J

b

I d } u n d e r " + s " n a m e $ { m s g . g e t R e g i s t r a t i

n

N a m e } . " ) g r a p h . g e t K v S t a t e L

c

a t i

n

R e g i s t r y . n

t

i f y K v S t a t e R e g i s t e r e d ( m s g . g e t J

b

V e r t e x I d , m s g . g e t K e y G r

u

p R a n g e , m s g . g e t R e g i s t r a t i

n

N a m e , m s g . g e t K v S t a t e I d , m s g . g e t K v S t a t e S e r v e r A d d r e s s ) } c a t c h { c a s e t : T h r

w

a b l e = > l

g

. e r r

r

( s " F a i l e d t

n
t

i f y K v S t a t e R e g i s t r y a b

u

t r e g i s t r a t i

n

$ m s g . " ) } c a s e N

n

e = > l

g

. e r r

r

( s " R e c e i v e d $ m s g f

r

u n a v a i l a b l e j

b

. " ) } / / T a s k M a n a g e r K v S t a t e u n r e g i s t r a t i

n

c a s e m s g : N

t

i f y K v S t a t e U n r e g i s t e r e d = > c u r r e n t J

b

s . g e t ( m s g . g e t J

b

I d ) m a t c h { c a s e S

m

e ( ( g r a p h , _ ) ) = > t r y { g r a p h . g e t K v S t a t e L

c

a t i

n

R e g i s t r y . n

t

i f y K v S t a t e U n r e g i s t e r e d ( m s g . g e t J

b

V e r t e x I d , m s g . g e t K e y G r

u

p R a n g e , m s g . g e t R e g i s t r a t i

n

N a m e ) } c a t c h { c a s e t : T h r

w

a b l e = > l

g

. e r r

r

( s " F a i l e d t

n
t

i f y K v S t a t e R e g i s t r y a b

u

t r e g i s t r a t i

n

$ m s g . " ) } c a s e N

n

e = > l

g

. e r r

r

( s " R e c e i v e d $ m s g f

r

u n a v a i l a b l e j

b

. " ) } }

Multitier Languages

Single Compilation Unit

3

SLIDE 4

ScalaLoci ScalaLoci

Generic Distributed Architectures Multitier Event Streams Placement Types

4

SLIDE 5

Placement Types Placement Types

trait Registry extends Peer trait Node extends Peer

Peers

val message: Event[String] on Registry = placed { getMessageStream() }

Placement Types

5

SLIDE 6

Architecture Architecture

Node Node Node Node Node Registry

trait Registry extends Peer { type Tie <: Multiple[Node] } trait Node extends Peer { type Tie <: Single[Registry] with Multiple[Node] }

Architecture Specication through Peer Types Architecture-Based Remote Access

6

SLIDE 7

Remote Access Remote Access

trait Registry extends Peer { type Tie <: Single[Node] } trait Node extends Peer { type Tie <: Single[Registry] } val message: Event[String] on Node placed[Registry] { message.asLocal: Event[String] }

Node Registry

7

SLIDE 8

Aggregation Aggregation

trait Registry extends Peer { type Tie <: Multiple[Node] } trait Node extends Peer { type Tie <: Single[Registry] } val message: Event[String] on Node placed[Registry] { message.asLocalFromAll: Map[Remote[Node], Event[String]] }

Node Node Node Registry

8

SLIDE 9

Communication Communication

9

SLIDE 10

Data Flow Data Flow

Node Registry message process message result

val message = Event[String]() val result = message map processMessage val ui = new UI(result)

10

SLIDE 11

Distributed Data Flow Distributed Data Flow

val message: Event[String] on Node = placed[Node] { Event[String]() } val result = placed[Registry] { message.asLocal map processMessage } val ui = placed[Node] { new UI(result.asLocal) }

Node Registry message process message result

11

SLIDE 12

Complete Distributed Chat Complete Distributed Chat

@multitier object Chat { trait Registry extends Peer { type Tie <: Multiple[Node] } trait Node extends Peer { type Tie <: Single[Registry] } val message = placed[Node] { Event[String]() } val publicMessage = placed[Registry] { message.asLocalFromAllSeq map { case (_, msg) => msg } } placed[Node].main { publicMessage.asLocal observe println for (line <- io.Source.stdin.getLines) message.fire(line) } } Registry Node

println

Node

println public Message message message

12

SLIDE 13

Evaluation Evaluation

Two Two existing existing systems systems Case studies: Case studies: 22 variants 22 variants

13

SLIDE 14

Porting to Distribution Porting to Distribution

Local ScalaLoci Akka RMI

val ballSize = 20 val maxX = 800 val maxY = 400 val leftPos = 30 val rightPos = 770 val initPosition = Point(400, 200) val initSpeed = Point(10, 8) val ball: Signal[Point] = tick.fold(initPosition) { (ball, _) => ball + speed.get } val areas = { val racketY = Seq( Signal { UI.mousePosition().y }, Signal { ball().y }) val leftRacket = Racket(leftRacketPos, racketY(0)) val rightRacket = Racket(rightRacketPos, racketY(1)) val rackets = List(leftRacket, rightRacket) Signal { rackets map { _.area() } } } val leftWall = ball.changed && { _.x < 0 } val rightWall = ball.changed && { _.x > maxX } val xBounce = { val ballInRacket = Signal { areas() exists { _ contains ball() } } val collisionRacket = ballInRacket changedTo true leftWall || rightWall || collisionRacket } val yBounce = ball.changed && { ball => ball.y < 0 || ball.y > maxY } val speed = { val x = xBounce toggle (initSpeed.x, -initSpeed.x) val y = yBounce toggle (initSpeed.y, -initSpeed.y) Signal { Point(x(), y()) } } val score = { val leftPoints = rightWall.iterate(0) { _ + 1 } val rightPoints = leftWall.iterate(0) { _ + 1 } Signal { leftPoints() + " : " + rightPoints() } } val ui = new UI(areas, ball, score) val ballSize = 20 val maxX = 800 val maxY = 400 val leftPos = 30 val rightPos = 770 val initPosition = Point(400, 200) val initSpeed = Point(10, 8) val ball: Signal[Point] = { tick.fold(initPosition) { (ball, _) => ball + speed.get } } val areas = { val racketY = Signal { val leftRacket = Racket(leftPos, ) val rightRacket = Racket(rightPos, ) val rackets = List(leftRacket, rightRacket) Signal { rackets map { _.area() } } } val leftWall = { ball.changed && { _.x < 0 } } val rightWall = { ball.changed && { _.x > maxX } } val xBounce = { val ballInRacket = Signal { areas() exists { _ contains ball() } } val collisionRacket = ballInRacket changedTo true leftWall || rightWall || collisionRacket } val yBounce = { ball.changed && { ball => ball.y < 0 || ball.y > maxY } } val speed = { val x = xBounce toggle (initSpeed.x, - initSpeed.x) val y = yBounce toggle (initSpeed.y, - initSpeed.y) Signal { Point(x(), y()) } } val score = { val leftPoints = rightWall.iterate(0) { _ + 1 } val rightPoints = leftWall.iterate(0) { _ + 1 } Signal { leftPoints() + " : " + rightPoints() } } val ui = { new UI(areas , ball , score ) } trait Server extends ServerPeer[Client] trait Client extends ClientPeer[Server] val clientMouseY = placed[Client] { Signal { UI.mousePosition().y } } val isPlaying = { Signal { .size > 2 } } placed[Server].local remote[Client].connected()

n Server

placed if (isPlaying.get) else pos val players = { Signal { match { case left :: right :: _ => Seq(Some(left), Some(right)) case _ => Seq(None, None) } } } placed[Server].local remote[Client].connected() placed[Server] players() map { _ map { client => } getOrElse initPosition.y } } (clientMouseY from client).asLocal() Signal { racketY()(0) } Signal { racketY()(1) } placed[Server].local placed[Server].local placed[Server].local placed[Server].local placed[Server].local placed[Server] placed[Client] .asLocal .asLocal .asLocal val ballSize = 20 val maxX = 800 val maxY = 400 val leftPos = 30 val rightPos = 770 val initPosition = Point(400, 200) val initSpeed = Point(10, 8) class Server extends Actor { val ball: Signal[Point] = tick.fold(initPosition) { (ball, _) => ball + speed.get } val areas = { val racketY = Signal { val leftRacket = new Racket(leftRacketPos, Signal { racketY()(0) }) val rightRacket = new Racket(rightRacketPos, Signal { racketY()(1) }) val rackets = List(leftRacket, rightRacket) Signal { rackets map { _.area() } } } val leftWall = ball.changed && { _.x < 0 } val rightWall = ball.changed && { _.x > maxX } val xBounce = { val ballInRacket = Signal { areas() exists { _ contains ball() } } val collisionRacket = ballInRacket changedTo true leftWall || rightWall || collisionRacket } val yBounce = ball.changed && { ball => ball.y < 0 || ball.y > maxY } val speed = { val x = xBounce toggle (Signal { initSpeed.x }, Signal { -initSpeed.x }) val y = yBounce toggle (Signal { initSpeed.y }, Signal { -initSpeed.y }) Signal { Point(x(), y()) } } val score = { val leftPlayerPoints = rightWall.iterate(0) { _ + 1 } val rightPlayerPoints = leftWall.iterate(0) { _ + 1 } Signal { leftPlayerPoints() + " : " + rightPlayerPoints() } } } abstract class Client(server: ActorSelection) extends Actor { mousePosition observe { pos => } val ui = new UI(areas, ball, score) } def receive = addPlayer orElse mouseYChanged val clients = Var(Seq.empty[ActorRef]) val mousePositions = Var(Map.empty[ActorRef, Int]) def mouseYChanged: Receive = { case MouseYChanged(y) => mousePositions transform { _ + (sender -> y) } } val isPlaying = Signal { .size >= 2 } clients() if (isPlaying.get) else ball def addPlayer: Receive = { case AddPlayer => } clients transform { _ :+ sender } val players = Signal { match { case left :: right :: _ => Seq(Some(left), Some(right)) case _ => Seq(None, None) } } clients() players() map { _ flatMap { } getOrElse initPosition.y } } mousePositions() get _ areas observe { areas => clients.now foreach { _ ! UpdateAreas(areas) } } ball observe { ball => clients.now foreach { _ ! UpdateBall(ball) } } score observe { score => clients.now foreach { _ ! UpdateScore(score) } } clients observe { _ foreach { client => client ! UpdateAreas(areas.now) client ! UpdateBall(ball.now) client ! UpdateScore(score.now) } } val areas = Var(List.empty[Area]) val ball = Var(Point(0, 0)) val score = Var("0 : 0") server ! MouseYChanged(pos.y) def receive = { case UpdateAreas(areas) => this.areas set areas case UpdateBall(ball) => this.ball set ball case UpdateScore(score) => this.score set score } server ! AddPlayer val ballSize = 20 val maxX = 800 val maxY = 400 val leftPos = 30 val rightPos = 770 val initPosition = Point(400, 200) val initSpeed = Point(10, 8) class ServerImpl extends Server { val ball: Signal[Point] = tick.fold(initPosition) { (ball, _) => ball + speed.get } val areas = { val racketY = Signal { val leftRacket = new Racket(leftRacketPos, Signal { racketY()(0) }) val rightRacket = new Racket(rightRacketPos, Signal { racketY()(1) }) val rackets = List(leftRacket, rightRacket) Signal { rackets map { _.area() } } } val leftWall = ball.changed && { _.x < 0 } val rightWall = ball.changed && { _.x > maxX } val xBounce = { val ballInRacket = Signal { areas() exists { _ contains ball() } } val collisionRacket = ballInRacket changedTo true leftWall || rightWall || collisionRacket } val yBounce = ball.changed && { ball => ball.y < 0 || ball.y > maxY } val speed = { val x = xBounce toggle (initSpeed.x, -initSpeed.x) val y = yBounce toggle (initSpeed.y, -initSpeed.y) Signal { Point(x(), y()) } } val score = { val leftPoints = rightWall.iterate(0) { _ + 1 } val righrPoints = leftWall.iterate(0) { _ + 1 } Signal { leftPoints() + " : " + righrPoints() } } } class ClientImpl(server: Server) extends Client { val self = makeStub[Client](this) UI.mousePosition observe { pos => } val ui = new UI(areas, ball, score) } @remote trait Server { def addPlayer(client: Client): Unit def mouseYChanged(client: Client, y: Int): Unit } val clients = Var(Seq.empty[Client]) val mousePositions = Var(Map.empty[Client, Int]) def mouseYChanged(client: Client, y: Int) = synchronized { mousePositions() = mousePositions.get + (client -> y) } val isPlaying = Signal { .size >= 2 } clients() if (isPlaying.get) else ball def addPlayer(client: Client) = synchronized { } clients transform { _ :+ client } val players = Signal { match { case left :: right :: _ => Seq(Some(left), Some(right)) case _ => Seq(None, None) } } clients() players() map { _ flatMap { } getOrElse initPosition.y } } mousePositions() get _ areas observe { updateAreasClients(clients.get, _) } ball observe { updateBallClients(clients.get, _) } score observe { updateScoreClients(clients.get, _) } clients observe { clients => updateAreasClients(clients, areas.get) updateBallClients(clients, ball.get) updateScoreClients(clients, score.get) } def updateAreasClients(clients: Seq[Client], areas: List[Area]) = clients foreach { _ updateAreas areas } def updateBallClients(clients: Seq[Client], ball: Point) = clients foreach { _ updateBall ball } def updateScoreClients(clients: Seq[Client], score: String) = clients foreach { _ updateScore score } @remote trait Client { def updateAreas(areas: List[Area]): Unit def updateBall(ball: Point): Unit def updateScore(score: String): Unit } val areas = Var(List.empty[Area]) val ball = Var(Point(0, 0)) val score = Var("0 : 0") server mouseYChanged (self, pos.y) def updateAreas(areas: List[Area]) = synchronized { this.areas() = areas } def updateBall(ball: Point) = synchronized { this.ball() = ball } def updateScore(score: String) = synchronized { this.score() = score } server addPlayer self

multi-user support distribution

14

SLIDE 15

Gearpump Real-Time Streaming Engine Gearpump Real-Time Streaming Engine

Master Proxy Master Worker

components with placement types no need to manually maintain architecture

trait MasterProxy extends Peer { type Tie <: Multiple[Master] with Multiple[Worker] } trait Worker extends Peer { type Tie <: Single[MasterProxy] with Optional[Master] } trait Master extends Peer { type Tie <: Multiple[MasterProxy] with Multiple[Worker] }

15

SLIDE 16 class TaskManagerGateway { def disconnectFromJobManager(instanceId: InstanceID, cause: Exception, mgr: ActorRef) = { mgr ! Disconnect(instanceId, cause) } def stopCluster(applicationStatus: ApplicationStatus, message: String, mgr: ActorRef) = { mgr ! StopCluster(applicationStatus, message) } def requestStackTrace(mgr: ActorRef) = { (mgr ? SendStackTrace).mapTo[StackTrace] } def submitTask(tdd: TaskDeploymentDescriptor, mgr: ActorRef) = { (mgr ? SubmitTask(tdd)).mapTo[Acknowledge] } def stopTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? StopTask(executionAttemptID)).mapTo[Acknowledge] } def cancelTask(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { (mgr ? CancelTask(executionAttemptID).mapTo[Acknowledge] } def updatePartitions(executionAttemptID: ExecutionAttemptID, partitionInfos: Iterable[PartitionInfo], mgr: ActorRef) = { (mgr ? UpdateTaskMultiplePartitionInfos(executionAttemptID, partitionInfos)) .mapTo[Acknowledge] } def failPartition(executionAttemptID: ExecutionAttemptID, mgr: ActorRef) = { mgr ! FailIntermediateResultPartitions(executionAttemptID) } def notifyCheckpointComplete(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, mgr: ActorRef) = { mgr ! NotifyCheckpointComplete(jobId, executionAttemptID, checkpointId, timestamp) } def triggerCheckpoint(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: long, timestamp: long, checkpointOptions: CheckpointOptions, mgr: ActorRef) = { mgr ! TriggerCheckpoint(jobId, executionAttemptID, checkpointId, timestamp, checkpointOptions) } def requestTaskManagerLog(logTypeRequest: LogTypeRequest, mgr: ActorRef) = { (mgr ? RequestTaskManagerLog(logTypeRequest)).mapTo[BlobKey] } } class TaskManager extends Actor { def receive = { case SendStackTrace => sendStackTrace() foreach { message => sender ! decorateMessage(message) } case Disconnect(instanceIdToDisconnect, cause) => if (instanceIdToDisconnect.equals(instanceID)) { handleJobManagerDisconnect("JobManager requested disconnect: " + cause.getMessage()) triggerTaskManagerRegistration() } else { log.debug("Received disconnect message for wrong instance id " + instanceIdToDisconnect) } case StopCluster(applicationStatus, message) => log.info(s"Stopping TaskManager with final application status " + s"$applicationStatus and diagnostics: $message") shutdown() case RequestTaskManagerLog(requestType) => blobService match { case Some(_) => handleRequestTaskManagerLog(requestType, currentJobManager.get) match { case Left(message) => sender() ! message case Right(message) => sender() ! message } case None => sender() ! akka.actor.Status.Failure(new IOException( "BlobService not available. Cannot upload TaskManager logs.")) } case UpdateTaskMultiplePartitionInfos(executionID, partitionInfos) => sender ! decorateMessage(updateTaskInputPartitions(executionID, partitionInfos)) case FailIntermediateResultPartitions(executionID) => log.info(s"Discarding the results produced by task execution $executionID") try { network.getResultPartitionManager.releasePartitionsProducedBy(executionID) } catch { case t: Throwable => killTaskManagerFatal( "Fatal leak: Unable to release intermediate result partition data", t) } case SubmitTask(tdd) => sender ! decorateMessage(submitTask(tdd)) case StopTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { try { task.stopExecution() sender ! decorateMessage(Acknowledge.get()) } catch { case t: Throwable => sender ! decorateMessage(Status.Failure(t)) } } else { log.debug(s"Cannot find task to stop for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case CancelTask(executionID) => val task = runningTasks.get(executionID) if (task != null) { task.cancelExecution() sender ! decorateMessage(Acknowledge.get()) } else { log.debug(s"Cannot find task to cancel for execution $executionID)") sender ! decorateMessage(Acknowledge.get()) } case TriggerCheckpoint(jobId, taskExecutionId, checkpointId, timestamp, checkpointOptions) => log.debug(s"Receiver TriggerCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.triggerCheckpointBarrier(checkpointId, timestamp, checkpointOptions) } else { log.debug(s"TaskManager received a checkpoint request " + s"for unknown task $taskExecutionId.") } case NotifyCheckpointComplete(jobId, taskExecutionId, checkpointId, timestamp) => log.debug(s"Receiver ConfirmCheckpoint $checkpointId@$timestamp " + s"for $taskExecutionId.") val task = runningTasks.get(taskExecutionId) if (task != null) { task.notifyCheckpointComplete(checkpointId) } else { log.debug(s"TaskManager received a checkpoint confirmation " + s"for unknown task $taskExecutionId.") } } } @multitier trait TaskManagerGatewayClusterTask { trait JobManager extends Peer { type Tie <: Multiple[TaskManager] } trait TaskManager extends Peer { type Tie <: Single[JobManager] } def disconnectFromJobManager(instanceId: InstanceID, cause: Exception, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(instanceId, cause){ if (instanceId.equals(instanceID)) { handleJobManagerDisconnect(s"JobManager requested disconnect: " + cause.getMessage()) triggerTaskManagerRegistration() } else { log.debug(s"Received disconnect message for wrong instance id " + instanceId) } } } def stopCluster(applicationStatus: ApplicationStatus, message: String, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(applicationStatus, message){ log.info(s"Stopping TaskManager with final application status " + s"$applicationStatus and diagnostics: $message") shutdown() } } def requestStackTrace(mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(tdd){ sendStackTrace() }.asLocal.map(_.left.get) } def submitTask(tdd: TaskDeploymentDescriptor, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(tdd){ submitTask(tdd) }.asLocal.map(_.left.get) } def stopTask(executionAttemptID: ExecutionAttemptID, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(executionAttemptID){ val task = runningTasks.get(executionAttemptID) if (task != null) { try { task.stopExecution() Left(Acknowledge.get()) } catch { case t: Throwable => Right(Status.Failure(t)) } } else { log.debug(s"Cannot find task to stop for execution $executionAttemptID)") Left(Acknowledge.get()) } }.asLocal.map(_.left.get) } def cancelTask(executionAttemptID: ExecutionAttemptID, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(executionAttemptID){ val task = runningTasks.get(executionAttemptID) if (task != null) { task.cancelExecution() Acknowledge.get() } else { log.debug(s"Cannot find task to cancel for execution $executionAttemptID") Acknowledge.get() } }.asLocal } } @multitier trait TaskManagerGatewayPartitionCheckLog { trait JobManager extends Peer { type Tie <: Multiple[TaskManager] } trait TaskManager extends Peer { type Tie <: Single[JobManager] } def updatePartitions( executionAttemptID: ExecutionAttemptID, partitionInfos: java.lang.Iterable[PartitionInfo], mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(executionAttemptID, partitionInfos){ updateTaskInputPartitions(executionAttemptID, partitionInfos) }.asLocal.map(_.left.get) } def failPartition(executionAttemptID: ExecutionAttemptID, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(executionAttemptID){ log.info(s"Discarding the results produced by task execution $executionID") try { network.getResultPartitionManager.releasePartitionsProducedBy(executionID) } catch { case t: Throwable => killTaskManagerFatal( "Fatal leak: Unable to release intermediate result partition data", t) } } } def notifyCheckpointComplete(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: Long, timestamp: Long, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(executionAttemptID, jobId, checkpointId, timestamp){ log.debug(s"Receiver ConfirmCheckpoint $checkpointId@$timestamp " + s"for $executionAttemptID.") val task = runningTasks.get(executionAttemptID) if (task != null) { task.notifyCheckpointComplete(checkpointId) } else { log.debug(s"TaskManager received a checkpoint confirmation " + s"for unknown task $taskExecutionId.") } } } def triggerCheckpoint(executionAttemptID: ExecutionAttemptID, jobId: JobID, checkpointId: Long, timestamp: Long, checkpointOptions: CheckpointOptions, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(executionAttemptID, jobId, checkpointId, timestamp, checkpointOptions){ log.debug(s"Receiver TriggerCheckpoint $checkpointId@$timestamp " + s"for $executionAttemptID.") val task = runningTasks.get(executionAttemptID) if (task != null) { task.triggerCheckpointBarrier(checkpointId, timestamp, checkpointOptions) } else { log.debug(s"TaskManager received a checkpoint request " + s"for unknown task $executionAttemptID.") } } } def requestTaskManagerLog(logTypeRequest: LogTypeRequest, mgr: Remote[TaskManager]) = placed[JobManagerPeer] { remote.on(mgr).capture(logTypeRequest){ blobService match { case Some(_) => handleRequestTaskManagerLog(logTypeRequest, currentJobManager.get) case None => Right(akka.actor.Status.Failure(new IOException( "BlobService not available. Cannot upload TaskManager logs."))) } }.asLocal.map(_.left.get) } }

Multiple modules CheckpointResponder KvStateRegistryListener PartitionProducerStateChecker ResultPartitionConsumableNotier TaskManager TaskManagerActions Eliminated 23 non-exhaustive pattern matches and 8 type casts

16

SLIDE 17

Apache Flink Stream Processor Apache Flink Stream Processor

Cloud Deployment Amazon EC2 Yahoo Streaming Benchmark

Latency

4 6 8 Number of workers 1000 800 600 400 200 Latency [ms]

Flink ScalaLoci Flink

Cumulative Distribution

0.2 0.4 0.6 0.8 1.0 Fraction of tuples complete 1500 1000 500

Flink, 4 workers Flink, 6 workers Flink, 8 workers ScalaLoci Flink, 4 workers ScalaLoci Flink, 6 workers ScalaLoci Flink, 8 workers

17

SLIDE 18

Fault Tolerance Dynamic Topologies Design Metrics Microbenchmarks Multiple Backends Formalization

18

SLIDE 19

scala-loci.github.io scala-loci.github.io

19

SLIDE 20

ScalaLoci ScalaLoci l

Generic Distributed Architectures

Node Node Node Node Node Registry

Multitier Event Streams

Node Registry message process message result

Placement Types

Value on Peer

20