Clustering completed

This commit is contained in:
Rohan Sircar 2020-04-08 20:18:40 +05:30
parent a6a366b62c
commit a6e0377816
2 changed files with 323 additions and 36 deletions

View File

@ -2,6 +2,7 @@ import model.Coord
import model.Customer
import model.HHCEdge
import scala.collection.mutable._
import util.Util
object Main {
def main(args: Array[String]): Unit = {
@ -42,45 +43,38 @@ object Main {
// sample adjacency
// format: off
val edges = Array(
Array(0 , 9 , 75, 0 , 0),
Array(9 , 0 , 95, 19, 42),
Array(75, 95, 0 , 51, 66),
Array(0 , 19, 51, 0 , 31),
Array(0 , 42, 66, 31, 0)
val edgesSeq = Seq(
Seq(0 , 9 , 75, 0 , 0),
Seq(9 , 0 , 95, 19, 42),
Seq(75, 95, 0 , 51, 66),
Seq(0 , 19, 51, 0 , 31),
Seq(0 , 42, 66, 31, 0)
);
// adjacency list form
// 0 -> 1 -> 2
// 1 -> 0 -> 2 -> 3 -> 4
// 2 -> 0 -> 1 -> 3 -> 4
// 3 -> 1 -> 2 -> 4
// 4 -> 1 -> 2 -> 3
val edges = edgesSeq.map(e => {
e.toArray
}).toArray
val mst = Util.mstUsingPrims(edges)
// mst
// 0, 9 , 0 , 0 , 0
// 9, 0 , 0 , 19, 0
// 0, 0 , 0 , 51, 0
// 0, 19, 51, 0 , 31
// 0, 0 , 0 , 31, 0
// format: on
// Prim's algorithm
val selected: ArrayBuffer[Boolean] =
ArrayBuffer.fill(5)(false)
selected(0) = true
for (_ <- 0 until 4) {
var min = 999999
var x = 0
var y = 0
for (i <- 0 until 5) {
if (selected(i) == true) {
for (j <- 0 until 5) {
if (selected(j) == false && edges(i)(j) != 0) {
if (min > edges(i)(j)) {
min = edges(i)(j)
x = i
y = j
}
}
}
}
}
println(s"Edge selected $x - $y : ${edges(x)(y)}")
selected(y) = true
}
// Prim's algorithm result
// Edge selected 0 - 1: 9
// Edge selected 1 - 3: 19
@ -89,13 +83,12 @@ object Main {
// Verify the result with the one at https://www.programiz.com/dsa/prim-algorithm
val edges2: ArrayBuffer[ArrayBuffer[Double]] = ArrayBuffer.empty
val edges2: Array[Array[Double]] = Array.ofDim(5, 5)
// create adjacency matrix from given customers
for (i <- 0 to 4) {
edges2.append(ArrayBuffer.empty)
for (j <- 0 to 4) {
edges2(i).append(HHCEdge(V(i), V(j)).weight)
edges2(i)(j) = Util.getHaversineDistance(V(i).location, V(j).location)
}
}
@ -113,5 +106,78 @@ object Main {
// 563.55, 564.16, 478.40, 0.00 , 463.64
// 225.88, 357.08, 252.42, 463.64, 0.00
println()
println("Initial graph:")
edgesSeq.foreach { e =>
e.foreach { d =>
print(s"$d, ")
}
println()
}
println("MST: ")
mst.foreach { e =>
e.foreach { d =>
print(s"$d, ")
}
println()
}
// 0, 9, 0 , 0 , 0
// 0, 0, 0 , 19, 0
// 0, 0, 0 , 0 , 0
// 0, 0, 51, 0 , 31
// 0, 0, 0 , 0 , 0
val (centr, removed) = Util.findCentroids(mst)
// val (centr2, eds2) = Util.findCentroids(edges2)
println()
println(s"Centroids: \n$centr")
println(s"Removed: \n$removed")
val clust = Util.findClusters(mst, centr)
val adjList = Util.makeAdjacencyList(edges, centr)
println(s"Clusters:")
clust.foreach(c => {
val (e, d) = c
print(s"$e: ")
d.foreach(f => {
print(s"-> $f ")
})
println()
})
println()
val fnl = Util.groupClusters(centr, clust, removed)
println(s"Final cluster groups: \n$fnl")
// Output
//
// Initial graph:
// 0, 9, 75, 0, 0,
// 9, 0, 95, 19, 42,
// 75, 95, 0, 51, 66,
// 0, 19, 51, 0, 31,
// 0, 42, 66, 31, 0,
// MST:
// 0, 9, 0, 0, 0,
// 9, 0, 0, 19, 0,
// 0, 0, 0, 51, 0,
// 0, 19, 51, 0, 31,
// 0, 0, 0, 31, 0,
// Centroids:
// Vector(2, 3, 4)
// Removed:
// Vector((2,3,51), (3,2,51), (3,4,31), (4,3,31))
// Clusters:
// 2: -> 2
// 3: -> 3 -> 1 -> 0
// 4: -> 4
// Final cluster groups:
// Map(2 -> Vector((3,51)), 3 -> Vector((4,31), (2,51)), 4 -> Vector((3,31)))
}
}

View File

@ -2,6 +2,8 @@ package util
import model.Coord
import scala.math._
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable
object Util {
private val r = 6471.00 // km
@ -44,4 +46,223 @@ object Util {
val d = r * c
d
}
def primTraverse(
arr1: Array[Array[Int]],
comp: Int,
cond: (Int, Int) => Boolean,
cb: (Int, Int, Array[Array[Int]], Array[Array[Int]]) => Array[Array[Int]]
): Unit = {
val n = arr1.length
val selected = Array.ofDim[Boolean](n)
val arr2: Array[Array[Int]] = Array.ofDim(n, n)
selected(0) = true
// val mst: Array[Array[Int]] = Array.ofDim(5, 5)
for (_ <- 0 until n - 1) {
// var min = 999999
var x = 0
var y = 0
for (i <- 0 until n) {
if (selected(i) == true) {
for (j <- 0 until n) {
if (selected(j) == false && arr1(i)(j) != 0) {
if (cond(comp, arr1(i)(j))) {
x = i
y = j
cb(x, y, arr1, arr2)
}
}
}
}
}
// mst(x)(y) = mst(x)(y)
selected(y) = true
}
}
def mstUsingPrims(
edges: Array[Array[Int]]
): Array[Array[Int]] = {
val n = edges.length
val selected: ArrayBuffer[Boolean] = ArrayBuffer.fill(n)(false)
selected(0) = true
val mst: Array[Array[Int]] = Array.ofDim(n, n)
for (_ <- 0 until n - 1) {
var min = 999999
var x = 0
var y = 0
for (i <- 0 until n) {
if (selected(i) == true) {
for (j <- 0 until n) {
if (selected(j) == false && edges(i)(j) != 0) {
if (min > edges(i)(j)) {
min = edges(i)(j)
x = i
y = j
}
}
}
}
}
// println(s"Edge selected $x - $y : ${edges(x)(y)}")
mst(x)(y) = edges(x)(y)
mst(y)(x) = edges(x)(y)
selected(y) = true
}
mst
}
def findClusters(
mst: Array[Array[Int]],
centroids: IndexedSeq[Int]
): Map[Int, ArrayBuffer[Int]] = {
val n = mst.length
val x: Map[Int, ArrayBuffer[Int]] = centroids
.map(d => {
val y = DFS(d, mst)
y(0) -> y
})
.toMap
x
}
def makeAdjacencyList(
mst: Array[Array[Int]],
centroids: IndexedSeq[Int]
): ArrayBuffer[ArrayBuffer[Int]] = {
val n = mst.length
// val selected: ArrayBuffer[Boolean] = ArrayBuffer.fill(n)(false)
val buf: ArrayBuffer[ArrayBuffer[Int]] =
ArrayBuffer.fill(n)(ArrayBuffer.empty)
// for (_ <- 0 until n -1) {
// }
for (i <- 0 until n) {
for (j <- 0 until n) {
if (mst(i)(j) != 0) {
// println(s" $i $j = ${mst(i)(j)}")
buf(i) += j
}
}
}
buf
}
def findCentroids[T](
mst: Array[Array[T]]
)(implicit ev: Numeric[T]): (IndexedSeq[Int], IndexedSeq[(Int, Int, T)]) = {
val n = mst.length
val centroids: mutable.Set[Int] = mutable.Set.empty
val removed: ArrayBuffer[(Int, Int, T)] = ArrayBuffer.empty
for (i <- 0 until n) {
for (j <- 0 until n) {
if (ev.gt(mst(i)(j), ev.fromInt(20)) && mst(i)(j) != 0) {
// println(s" $i $j = ${mst(i)(j)}")
centroids += i
centroids += j
removed.append((i, j, mst(i)(j)))
mst(i)(j) = ev.zero
}
}
}
(centroids.toIndexedSeq, removed.toIndexedSeq)
}
// def DFS(start: Int, graph: Array[Array[Int]], visited: Array[Boolean]): Unit = {
// // if(start == 0) {
// // visited = Array.fill(graph.size)(false)
// // }
// visited(start) = true
// println(s"$start ")
// for(i <- 0 until graph.size) {
// if (graph(start)(i) > 0 && graph(start)(i) < 20 && (!visited(i))) {
// DFS(i, graph, visited);
// }
// }
// }
// val visited = Array.fill(mst.size)(false)
def DFS(
start: Int,
graph: Array[Array[Int]]
): ArrayBuffer[Int] = {
val visited = Array.fill(graph.size)(false)
val buf = ArrayBuffer[Int]()
def loop(start: Int, graph: Array[Array[Int]], visited: Array[Boolean]) {
visited(start) = true
buf += start
// print(s"$start ")
for (i <- 0 until graph.size) {
if (graph(start)(i) > 0 && (!visited(i))) {
loop(i, graph, visited);
}
}
}
loop(start, graph, visited)
// println()
buf
}
def DFS(
start: Int,
graph: Array[Array[Int]],
num: Int,
cond: (Int, Int) => Boolean
): ArrayBuffer[Int] = {
val visited = Array.fill(graph.size)(false)
val buf = ArrayBuffer[Int]()
def loop(start: Int, graph: Array[Array[Int]], visited: Array[Boolean]) {
visited(start) = true
buf += start
// print(s"$start ")
for (i <- 0 until graph.size) {
if (graph(start)(i) > 0 && cond(graph(start)(i), num) && (!visited(i))) {
loop(i, graph, visited);
}
}
}
loop(start, graph, visited)
// println()
buf
}
def groupClusters(
centroids: IndexedSeq[Int],
clusters: Map[Int, ArrayBuffer[Int]],
removed: IndexedSeq[(Int, Int, Int)]
) = {
val groups = centroids
.map(c => {
val cluster = clusters(c)
val lst = removed
.filter(r => {
c == r._1
})
.sortWith((x, y) => {
x._3 < y._3
})
.map(l => {
(l._2,l._3)
})
c -> lst
})
.toMap
groups
}
}