Commit d00b05c6f35fdc3e116683546c34acf98057b6ce

Authored by Joanne ago
1 parent 5ee0858f89
Exists in master

editor score range

Showing 1 changed file with 20 additions and 12 deletions Side-by-side Diff

app/com/piki_ds/ver1/EditorScore.scala View file @ d00b05c
... ... @@ -21,7 +21,7 @@
21 21  
22 22 object EditorScore {
23 23  
24   - def getSparkConf= {
  24 + def getSparkConf1= {
25 25 val conf = new SparkConf().setAppName("EditorScore")
26 26 conf.setMaster("local[3]")
27 27 conf.set("master", "local[3]")
28 28  
... ... @@ -29,17 +29,20 @@
29 29 conf.set("spark.driver.allowMultipleContexts", "true")
30 30 }
31 31  
32   - val sc = new SparkContext(getSparkConf)
33   - val sqlContext = SQLContext.getOrCreate(sc)
34   - val hadoopConf = sc.hadoopConfiguration
35   - val fs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
  32 + def getSparkConf2= {
  33 + val conf = new SparkConf().setAppName("EditorScore")
  34 + conf.setMaster("local[3]")
  35 + conf.set("master", "local[3]")
  36 + conf.set("spark.app.name", "EditorScore")
  37 + conf.set("spark.driver.allowMultipleContexts", "t")
  38 + }
36 39  
37 40 def recentlyUpdatedPath(path:String , isParted:Boolean = true, hdfs:FileSystem): FileStatus = {
38 41 val list = hdfs.listStatus(new Path(path))
39 42 list.filter(x=>x.isDirectory && (!isParted || (isParted && hdfs.exists(x.getPath.suffix("/_SUCCESS"))))).maxBy(x=>x.getModificationTime)
40 43 }
41 44  
42   - def followGetter(sQLContext: SQLContext, dateKey:String, fileSave:Boolean = true) = {
  45 + def followGetter(sQLContext: SQLContext, dateKey:String, fs:FileSystem, fileSave:Boolean = true) = {
43 46 val fromExisting = sQLContext.read.format("json").load(recentlyUpdatedPath("/preprocess/followInfo",false,fs).getPath.toString)
44 47 val fromUpdated = getDashTable(sQLContext, "EDITOR_FOLLOW", dateKey)
45 48 val unionFrom = fromExisting.unionAll(fromUpdated)
... ... @@ -58,7 +61,7 @@
58 61 follow_info
59 62 }
60 63  
61   - def editorDB(sQLContext: SQLContext, dateKey:String): RDD[(String, String, Long, Long)] = {
  64 + def editorDB(sQLContext: SQLContext, fs:FileSystem, dateKey:String): RDD[(String, String, Long, Long)] = {
62 65 // DB에서 USER table 파싱해오기, 에디터 가져오기 (필수적인 단계아님....-_-)
63 66 val levels = Array("ADMIN_O","EDITOR_O","PARTNER_O", "PRESS_O","STAFF_O")
64 67 val filterS_user = s"where level in (${levels.map(x=>"'" + x+ "'").mkString(",")})"
65 68  
... ... @@ -67,9 +70,9 @@
67 70 val user_info: RDD[(String, String)] = user_tableGet.map(x=>(x.getAs[Long]("uid").toString, x.getAs[String]("name")))
68 71  
69 72 // DB에서 FOLLOW table 파싱해오기, 팔로워 수 가져오기
70   - val follow_info: RDD[(String, Long)] = followGetter(sQLContext, dateKey)
  73 + val follow_info: RDD[(String, Long)] = followGetter(sQLContext, dateKey,fs)
71 74  
72   - val joinedFollowInfo: RDD[(String, (String, Long))] = user_info.leftOuterJoin(follow_info).map(x=>(x._1,(x._2._1,x._2._2.getOrElse(10L))))
  75 + val joinedFollowInfo: RDD[(String, (String, Long))] = user_info.fullOuterJoin(follow_info).map(x=>(x._1,(x._2._1.getOrElse(""),x._2._2.getOrElse(10L))))
73 76  
74 77 // DB에서 MG_CONTENTS table 파싱해오기, 에디터 debut date 가져오기
75 78 val filterS_mgcont = " where uid is not null group by uid"
... ... @@ -80,8 +83,8 @@
80 83 val debut_info: RDD[(String, Long)] = mgcont_table.map(x=>(x.getAs[Long]("uid").toString, x.getAs[Long]("unixtimestamp(min(udate),yyyy-MM-dd HH:mm:ss)")))
81 84  
82 85 // uid, name, follow, debut
83   - val rawOut: RDD[(String, String, Long, Long)] = joinedFollowInfo.leftOuterJoin(debut_info).map(x=>{
84   - (x._1,(x._2._1 ,x._2._2.getOrElse(10L)))
  86 + val rawOut: RDD[(String, String, Long, Long)] = joinedFollowInfo.fullOuterJoin(debut_info).map(x=>{
  87 + (x._1,(x._2._1.getOrElse(("",10L)) ,x._2._2.getOrElse(10L)))
85 88 }).map(x=>(x._1,x._2._1._1, x._2._1._2, x._2._2))
86 89 rawOut.map(x=>(x._1,x._2, math.min(20000,x._3),x._4))
87 90 }
88 91  
... ... @@ -99,11 +102,16 @@
99 102 }
100 103  
101 104 def main(args:Array[String]) {
  105 + val sc = new SparkContext(getSparkConf1)
  106 + val sqlContext = SQLContext.getOrCreate(sc)
  107 + val hadoopConf = sc.hadoopConfiguration
  108 + val fs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
  109 +
102 110 val nowTS: Long = System.currentTimeMillis
103 111 val dateKey = getDateKey(nowTS)
104 112  
105 113 //에디터 인기 점수 구하기
106   - val fromDB = editorDB(sqlContext, dateKey)
  114 + val fromDB = editorDB(sqlContext, fs, dateKey)
107 115  
108 116 val ePopularity: RDD[((String, String), Double)] = make_0to1_2Key(popularity(fromDB, nowTS).map(x=>(x._1,1-MatrixFunctions.tanh(math.max(1,x._2))*(-1)-1)))
109 117