Commit 3f5548ee2360aa9042266fc3528d2f4a64980d7e
1 parent
8230469489
Exists in
master
temp change for insertcidList
Showing 4 changed files with 51 additions and 4 deletions Side-by-side Diff
app/com/piki_ds/preprocess/CidValidation.scala
View file @
3f5548e
... | ... | @@ -2,7 +2,7 @@ |
2 | 2 | |
3 | 3 | import org.apache.spark.sql.SQLContext |
4 | 4 | |
5 | -import com.piki_ds.utils.GetTextFile.getDashDump | |
5 | +import com.piki_ds.utils.GetTextFile.getDBDump | |
6 | 6 | |
7 | 7 | |
8 | 8 | /** |
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | val whereStr = s"udate is not null and title is not null and" + |
17 | 17 | s" contents_type in ('ALBUM', 'ALBUM.A', 'CHST', 'CHST.A','TOON','TOON.A') and " + |
18 | 18 | s"status in (${filterStatus.map(x=>s"'$x'").mkString(",")})" |
19 | - val mgc = getDashDump(sQLContext,"MG_CONTENTS").where(whereStr) | |
19 | + val mgc = getDBDump(sQLContext,"MG_CONTENTS").where(whereStr) | |
20 | 20 | val mgContents = mgc.select(mgc("contents_id"),mgc("status"), unix_timestamp(mgc("udate"))) |
21 | 21 | mgContents.map(x=>{ |
22 | 22 | val ts = x.getAs[Long]("unixtimestamp(udate,yyyy-MM-dd HH:mm:ss)") |
app/com/piki_ds/utils/GetTextFile.scala
View file @
3f5548e
app/com/piki_ds/ver1/InsertCidList.scala
View file @
3f5548e
1 | +package com.piki_ds.ver1 | |
2 | + | |
3 | +import com.piki_ds.utils.hbase.HbaseInserter | |
4 | +import org.apache.spark.SparkContext | |
5 | +import org.apache.spark.sql.SQLContext | |
6 | + | |
7 | +/** | |
8 | + * Created by jungwon on 5/12/16. | |
9 | + */ | |
10 | + | |
11 | +object InsertCidList { | |
12 | + | |
13 | + var sc: SparkContext = SparkContext.getOrCreate() | |
14 | + var sqlContext: SQLContext = SQLContext.getOrCreate(sc) | |
15 | + | |
16 | + val modelName: Map[String, Seq[String]] = Map( | |
17 | + "uniform" -> | |
18 | + Seq("quality"), | |
19 | + "single"-> | |
20 | + Seq( | |
21 | + "cf", | |
22 | + "topic", | |
23 | + "age", | |
24 | + "sex", | |
25 | + "w2v" | |
26 | + ), | |
27 | + "ensemble" -> | |
28 | + Seq( | |
29 | + "ensemble1", | |
30 | + "ensemble2", | |
31 | + "ensemble3", | |
32 | + "ensemble4" | |
33 | + ) | |
34 | + ) | |
35 | + | |
36 | + modelName("single").foreach(model=> { | |
37 | + val doi = "20160508" | |
38 | + val getMax30 = sc.objectFile[(Int, String)](s"/user/joanne/clols/$doi/$model") | |
39 | + getMax30.groupBy(x=>x._1%1000).foreach(x=>{ | |
40 | + val tableName = s"uuid-cidlist_$model" | |
41 | + val insertArray = x._2.map(a=>(a._1.toString,a._2)).toArray | |
42 | + val test = new HbaseInserter(tableName) | |
43 | + test.insert(insertArray) | |
44 | + }) | |
45 | + }) | |
46 | + | |
47 | +} |
app/com/piki_ds/ver1/QualityScore.scala
View file @
3f5548e
... | ... | @@ -75,7 +75,7 @@ |
75 | 75 | val param = Map("content"->0.5D,"comment"-> 0.30D,"editor"->0.20D) |
76 | 76 | |
77 | 77 | val finalScore: RDD[(Int, Long)] = combineScores(content,comment,editor,param).map(x=>(x._1.toInt,x._2.toLong)).filter(_._1 != 0) |
78 | - | |
78 | + scoreSave(doi,"quality","",finalScore.map(x=>(x._1.toString,x._2)),1) | |
79 | 79 | val insertArray = finalScore.map(x=>(x._1.toString, x._2.toString)).collect() |
80 | 80 | val test = new HbaseInserter("cid_quality") |
81 | 81 | test.insert(insertArray) |