val data_path2 = "hdfs://127.0.0.1:9000/data/9_1M.csv"
val dataset2 = IOHelper.readDataset(sc, data_path2)
val settings = new DbscanSettings ().withEpsilon (0.8).withNumberOfPoints (4).withTreatBorderPointsAsNoise(true)
val clusteringResult = Dbscan.train (dataset2, settings)
name := "spark_dbscan"
organization := "org.alitouka"
version := "0.0.4"
scalaVersion := "2.11.7"
libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.2.0" % "provided"
libraryDependencies += "org.scalatest" % "scalatest_2.11" % "2.1.3" % "test"
libraryDependencies += "org.apache.commons" % "commons-math3" % "3.2"
// https://mvnrepository.com/artifact/com.github.scopt/scopt_2.10
libraryDependencies += "com.github.scopt" % "scopt_2.11" % "3.7.0"
Zeppelin notebook export json https://gist.github.com/0f067d6ff2239500ca8eed7d38b5872b
Built on commit d3b085286ccb16b146e7bb5234765cbc23e11c66
error log https://gist.github.com/ttpro1995/7437b1f3b1f944fd26daf2ef4ba73efe
build.sbt