InsertCidList.scala 1.03 KB
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
package com.piki_ds.ver1

import com.piki_ds.utils.hbase.HbaseInserter
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext

/**
* Created by jungwon on 5/12/16.
*/

object InsertCidList {

var sc: SparkContext = SparkContext.getOrCreate()
var sqlContext: SQLContext = SQLContext.getOrCreate(sc)

val modelName: Map[String, Seq[String]] = Map(
"uniform" ->
Seq("quality"),
"single"->
Seq(
"cf",
"topic",
"age",
"sex",
"w2v"
),
"ensemble" ->
Seq(
"ensemble1",
"ensemble2",
"ensemble3",
"ensemble4"
)
)

modelName("single").foreach(model=> {
val doi = "20160508"
val getMax30 = sc.objectFile[(Int, String)](s"/user/joanne/clols/$doi/$model")
getMax30.groupBy(x=>x._1%1000).foreach(x=>{
val tableName = s"uuid-cidlist_$model"
val insertArray = x._2.map(a=>(a._1.toString,a._2)).toArray
val test = new HbaseInserter(tableName)
test.insert(insertArray)
})
})

}