From a6e037781662892b11b4bd497fe45f696e76c12e Mon Sep 17 00:00:00 2001 From: Rohan Sircar Date: Wed, 8 Apr 2020 20:18:40 +0530 Subject: [PATCH] Clustering completed --- src/main/scala/Main.scala | 138 ++++++++++++++------ src/main/scala/util/Util.scala | 221 +++++++++++++++++++++++++++++++++ 2 files changed, 323 insertions(+), 36 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 4310162..42e9ab9 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -2,6 +2,7 @@ import model.Coord import model.Customer import model.HHCEdge import scala.collection.mutable._ +import util.Util object Main { def main(args: Array[String]): Unit = { @@ -42,45 +43,38 @@ object Main { // sample adjacency // format: off - val edges = Array( - Array(0 , 9 , 75, 0 , 0), - Array(9 , 0 , 95, 19, 42), - Array(75, 95, 0 , 51, 66), - Array(0 , 19, 51, 0 , 31), - Array(0 , 42, 66, 31, 0) + val edgesSeq = Seq( + Seq(0 , 9 , 75, 0 , 0), + Seq(9 , 0 , 95, 19, 42), + Seq(75, 95, 0 , 51, 66), + Seq(0 , 19, 51, 0 , 31), + Seq(0 , 42, 66, 31, 0) ); + // adjacency list form + // 0 -> 1 -> 2 + // 1 -> 0 -> 2 -> 3 -> 4 + // 2 -> 0 -> 1 -> 3 -> 4 + // 3 -> 1 -> 2 -> 4 + // 4 -> 1 -> 2 -> 3 + + val edges = edgesSeq.map(e => { + e.toArray + }).toArray + + val mst = Util.mstUsingPrims(edges) + + // mst + // 0, 9 , 0 , 0 , 0 + // 9, 0 , 0 , 19, 0 + // 0, 0 , 0 , 51, 0 + // 0, 19, 51, 0 , 31 + // 0, 0 , 0 , 31, 0 + // format: on // Prim's algorithm - val selected: ArrayBuffer[Boolean] = - ArrayBuffer.fill(5)(false) - - selected(0) = true - - - for (_ <- 0 until 4) { - var min = 999999 - var x = 0 - var y = 0 - for (i <- 0 until 5) { - if (selected(i) == true) { - for (j <- 0 until 5) { - if (selected(j) == false && edges(i)(j) != 0) { - if (min > edges(i)(j)) { - min = edges(i)(j) - x = i - y = j - } - } - } - } - } - println(s"Edge selected $x - $y : ${edges(x)(y)}") - selected(y) = true - } - // Prim's algorithm result // Edge selected 0 - 1: 9 // Edge selected 1 - 3: 19 @@ -89,13 +83,12 @@ object Main { // Verify the result with the one at https://www.programiz.com/dsa/prim-algorithm - val edges2: ArrayBuffer[ArrayBuffer[Double]] = ArrayBuffer.empty + val edges2: Array[Array[Double]] = Array.ofDim(5, 5) // create adjacency matrix from given customers for (i <- 0 to 4) { - edges2.append(ArrayBuffer.empty) for (j <- 0 to 4) { - edges2(i).append(HHCEdge(V(i), V(j)).weight) + edges2(i)(j) = Util.getHaversineDistance(V(i).location, V(j).location) } } @@ -113,5 +106,78 @@ object Main { // 563.55, 564.16, 478.40, 0.00 , 463.64 // 225.88, 357.08, 252.42, 463.64, 0.00 + println() + println("Initial graph:") + edgesSeq.foreach { e => + e.foreach { d => + print(s"$d, ") + } + println() + } + println("MST: ") + mst.foreach { e => + e.foreach { d => + print(s"$d, ") + } + println() + } + + // 0, 9, 0 , 0 , 0 + // 0, 0, 0 , 19, 0 + // 0, 0, 0 , 0 , 0 + // 0, 0, 51, 0 , 31 + // 0, 0, 0 , 0 , 0 + + val (centr, removed) = Util.findCentroids(mst) + // val (centr2, eds2) = Util.findCentroids(edges2) + + println() + println(s"Centroids: \n$centr") + println(s"Removed: \n$removed") + + val clust = Util.findClusters(mst, centr) + val adjList = Util.makeAdjacencyList(edges, centr) + + println(s"Clusters:") + clust.foreach(c => { + val (e, d) = c + print(s"$e: ") + d.foreach(f => { + print(s"-> $f ") + }) + println() + }) + println() + + val fnl = Util.groupClusters(centr, clust, removed) + + println(s"Final cluster groups: \n$fnl") + + // Output + // + // Initial graph: + // 0, 9, 75, 0, 0, + // 9, 0, 95, 19, 42, + // 75, 95, 0, 51, 66, + // 0, 19, 51, 0, 31, + // 0, 42, 66, 31, 0, + // MST: + // 0, 9, 0, 0, 0, + // 9, 0, 0, 19, 0, + // 0, 0, 0, 51, 0, + // 0, 19, 51, 0, 31, + // 0, 0, 0, 31, 0, + + // Centroids: + // Vector(2, 3, 4) + // Removed: + // Vector((2,3,51), (3,2,51), (3,4,31), (4,3,31)) + // Clusters: + // 2: -> 2 + // 3: -> 3 -> 1 -> 0 + // 4: -> 4 + + // Final cluster groups: + // Map(2 -> Vector((3,51)), 3 -> Vector((4,31), (2,51)), 4 -> Vector((3,31))) } } diff --git a/src/main/scala/util/Util.scala b/src/main/scala/util/Util.scala index 63e7eb7..99ebb6b 100644 --- a/src/main/scala/util/Util.scala +++ b/src/main/scala/util/Util.scala @@ -2,6 +2,8 @@ package util import model.Coord import scala.math._ +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable object Util { private val r = 6471.00 // km @@ -44,4 +46,223 @@ object Util { val d = r * c d } + + def primTraverse( + arr1: Array[Array[Int]], + comp: Int, + cond: (Int, Int) => Boolean, + cb: (Int, Int, Array[Array[Int]], Array[Array[Int]]) => Array[Array[Int]] + ): Unit = { + val n = arr1.length + val selected = Array.ofDim[Boolean](n) + + val arr2: Array[Array[Int]] = Array.ofDim(n, n) + + selected(0) = true + + // val mst: Array[Array[Int]] = Array.ofDim(5, 5) + + for (_ <- 0 until n - 1) { + // var min = 999999 + var x = 0 + var y = 0 + for (i <- 0 until n) { + if (selected(i) == true) { + for (j <- 0 until n) { + if (selected(j) == false && arr1(i)(j) != 0) { + if (cond(comp, arr1(i)(j))) { + x = i + y = j + cb(x, y, arr1, arr2) + } + } + } + + } + + } + // mst(x)(y) = mst(x)(y) + selected(y) = true + } + } + def mstUsingPrims( + edges: Array[Array[Int]] + ): Array[Array[Int]] = { + val n = edges.length + val selected: ArrayBuffer[Boolean] = ArrayBuffer.fill(n)(false) + + selected(0) = true + + val mst: Array[Array[Int]] = Array.ofDim(n, n) + + + for (_ <- 0 until n - 1) { + var min = 999999 + var x = 0 + var y = 0 + for (i <- 0 until n) { + if (selected(i) == true) { + for (j <- 0 until n) { + if (selected(j) == false && edges(i)(j) != 0) { + if (min > edges(i)(j)) { + min = edges(i)(j) + x = i + y = j + } + } + } + } + } + // println(s"Edge selected $x - $y : ${edges(x)(y)}") + mst(x)(y) = edges(x)(y) + mst(y)(x) = edges(x)(y) + + selected(y) = true + } + mst + } + + def findClusters( + mst: Array[Array[Int]], + centroids: IndexedSeq[Int] + ): Map[Int, ArrayBuffer[Int]] = { + val n = mst.length + val x: Map[Int, ArrayBuffer[Int]] = centroids + .map(d => { + val y = DFS(d, mst) + y(0) -> y + }) + .toMap + x + } + + def makeAdjacencyList( + mst: Array[Array[Int]], + centroids: IndexedSeq[Int] + ): ArrayBuffer[ArrayBuffer[Int]] = { + val n = mst.length + // val selected: ArrayBuffer[Boolean] = ArrayBuffer.fill(n)(false) + val buf: ArrayBuffer[ArrayBuffer[Int]] = + ArrayBuffer.fill(n)(ArrayBuffer.empty) + // for (_ <- 0 until n -1) { + + // } + for (i <- 0 until n) { + for (j <- 0 until n) { + if (mst(i)(j) != 0) { + // println(s" $i $j = ${mst(i)(j)}") + buf(i) += j + } + } + } + buf + } + + def findCentroids[T]( + mst: Array[Array[T]] + )(implicit ev: Numeric[T]): (IndexedSeq[Int], IndexedSeq[(Int, Int, T)]) = { + val n = mst.length + val centroids: mutable.Set[Int] = mutable.Set.empty + val removed: ArrayBuffer[(Int, Int, T)] = ArrayBuffer.empty + for (i <- 0 until n) { + for (j <- 0 until n) { + if (ev.gt(mst(i)(j), ev.fromInt(20)) && mst(i)(j) != 0) { + // println(s" $i $j = ${mst(i)(j)}") + centroids += i + centroids += j + removed.append((i, j, mst(i)(j))) + mst(i)(j) = ev.zero + } + } + } + (centroids.toIndexedSeq, removed.toIndexedSeq) + } + + // def DFS(start: Int, graph: Array[Array[Int]], visited: Array[Boolean]): Unit = { + // // if(start == 0) { + // // visited = Array.fill(graph.size)(false) + // // } + // visited(start) = true + + // println(s"$start ") + + // for(i <- 0 until graph.size) { + // if (graph(start)(i) > 0 && graph(start)(i) < 20 && (!visited(i))) { + // DFS(i, graph, visited); + // } + // } + // } + // val visited = Array.fill(mst.size)(false) + def DFS( + start: Int, + graph: Array[Array[Int]] + ): ArrayBuffer[Int] = { + val visited = Array.fill(graph.size)(false) + val buf = ArrayBuffer[Int]() + def loop(start: Int, graph: Array[Array[Int]], visited: Array[Boolean]) { + visited(start) = true + buf += start + // print(s"$start ") + + for (i <- 0 until graph.size) { + if (graph(start)(i) > 0 && (!visited(i))) { + loop(i, graph, visited); + } + } + } + + loop(start, graph, visited) + // println() + buf + } + + def DFS( + start: Int, + graph: Array[Array[Int]], + num: Int, + cond: (Int, Int) => Boolean + ): ArrayBuffer[Int] = { + val visited = Array.fill(graph.size)(false) + val buf = ArrayBuffer[Int]() + def loop(start: Int, graph: Array[Array[Int]], visited: Array[Boolean]) { + visited(start) = true + buf += start + // print(s"$start ") + + for (i <- 0 until graph.size) { + if (graph(start)(i) > 0 && cond(graph(start)(i), num) && (!visited(i))) { + loop(i, graph, visited); + } + } + } + + loop(start, graph, visited) + // println() + buf + } + + def groupClusters( + centroids: IndexedSeq[Int], + clusters: Map[Int, ArrayBuffer[Int]], + removed: IndexedSeq[(Int, Int, Int)] + ) = { + val groups = centroids + .map(c => { + val cluster = clusters(c) + val lst = removed + .filter(r => { + c == r._1 + }) + .sortWith((x, y) => { + x._3 < y._3 + }) + .map(l => { + (l._2,l._3) + }) + c -> lst + }) + .toMap + groups + } + }