Commit d00b05c6f35fdc3e116683546c34acf98057b6ce
1 parent
5ee0858f89
Exists in
master
editor score range
Showing 1 changed file with 20 additions and 12 deletions Side-by-side Diff
app/com/piki_ds/ver1/EditorScore.scala
View file @
d00b05c
... | ... | @@ -21,7 +21,7 @@ |
21 | 21 | |
22 | 22 | object EditorScore { |
23 | 23 | |
24 | - def getSparkConf= { | |
24 | + def getSparkConf1= { | |
25 | 25 | val conf = new SparkConf().setAppName("EditorScore") |
26 | 26 | conf.setMaster("local[3]") |
27 | 27 | conf.set("master", "local[3]") |
28 | 28 | |
... | ... | @@ -29,17 +29,20 @@ |
29 | 29 | conf.set("spark.driver.allowMultipleContexts", "true") |
30 | 30 | } |
31 | 31 | |
32 | - val sc = new SparkContext(getSparkConf) | |
33 | - val sqlContext = SQLContext.getOrCreate(sc) | |
34 | - val hadoopConf = sc.hadoopConfiguration | |
35 | - val fs = org.apache.hadoop.fs.FileSystem.get(hadoopConf) | |
32 | + def getSparkConf2= { | |
33 | + val conf = new SparkConf().setAppName("EditorScore") | |
34 | + conf.setMaster("local[3]") | |
35 | + conf.set("master", "local[3]") | |
36 | + conf.set("spark.app.name", "EditorScore") | |
37 | + conf.set("spark.driver.allowMultipleContexts", "t") | |
38 | + } | |
36 | 39 | |
37 | 40 | def recentlyUpdatedPath(path:String , isParted:Boolean = true, hdfs:FileSystem): FileStatus = { |
38 | 41 | val list = hdfs.listStatus(new Path(path)) |
39 | 42 | list.filter(x=>x.isDirectory && (!isParted || (isParted && hdfs.exists(x.getPath.suffix("/_SUCCESS"))))).maxBy(x=>x.getModificationTime) |
40 | 43 | } |
41 | 44 | |
42 | - def followGetter(sQLContext: SQLContext, dateKey:String, fileSave:Boolean = true) = { | |
45 | + def followGetter(sQLContext: SQLContext, dateKey:String, fs:FileSystem, fileSave:Boolean = true) = { | |
43 | 46 | val fromExisting = sQLContext.read.format("json").load(recentlyUpdatedPath("/preprocess/followInfo",false,fs).getPath.toString) |
44 | 47 | val fromUpdated = getDashTable(sQLContext, "EDITOR_FOLLOW", dateKey) |
45 | 48 | val unionFrom = fromExisting.unionAll(fromUpdated) |
... | ... | @@ -58,7 +61,7 @@ |
58 | 61 | follow_info |
59 | 62 | } |
60 | 63 | |
61 | - def editorDB(sQLContext: SQLContext, dateKey:String): RDD[(String, String, Long, Long)] = { | |
64 | + def editorDB(sQLContext: SQLContext, fs:FileSystem, dateKey:String): RDD[(String, String, Long, Long)] = { | |
62 | 65 | // DB에서 USER table 파싱해오기, 에디터 가져오기 (필수적인 단계아님....-_-) |
63 | 66 | val levels = Array("ADMIN_O","EDITOR_O","PARTNER_O", "PRESS_O","STAFF_O") |
64 | 67 | val filterS_user = s"where level in (${levels.map(x=>"'" + x+ "'").mkString(",")})" |
65 | 68 | |
... | ... | @@ -67,9 +70,9 @@ |
67 | 70 | val user_info: RDD[(String, String)] = user_tableGet.map(x=>(x.getAs[Long]("uid").toString, x.getAs[String]("name"))) |
68 | 71 | |
69 | 72 | // DB에서 FOLLOW table 파싱해오기, 팔로워 수 가져오기 |
70 | - val follow_info: RDD[(String, Long)] = followGetter(sQLContext, dateKey) | |
73 | + val follow_info: RDD[(String, Long)] = followGetter(sQLContext, dateKey,fs) | |
71 | 74 | |
72 | - val joinedFollowInfo: RDD[(String, (String, Long))] = user_info.leftOuterJoin(follow_info).map(x=>(x._1,(x._2._1,x._2._2.getOrElse(10L)))) | |
75 | + val joinedFollowInfo: RDD[(String, (String, Long))] = user_info.fullOuterJoin(follow_info).map(x=>(x._1,(x._2._1.getOrElse(""),x._2._2.getOrElse(10L)))) | |
73 | 76 | |
74 | 77 | // DB에서 MG_CONTENTS table 파싱해오기, 에디터 debut date 가져오기 |
75 | 78 | val filterS_mgcont = " where uid is not null group by uid" |
... | ... | @@ -80,8 +83,8 @@ |
80 | 83 | val debut_info: RDD[(String, Long)] = mgcont_table.map(x=>(x.getAs[Long]("uid").toString, x.getAs[Long]("unixtimestamp(min(udate),yyyy-MM-dd HH:mm:ss)"))) |
81 | 84 | |
82 | 85 | // uid, name, follow, debut |
83 | - val rawOut: RDD[(String, String, Long, Long)] = joinedFollowInfo.leftOuterJoin(debut_info).map(x=>{ | |
84 | - (x._1,(x._2._1 ,x._2._2.getOrElse(10L))) | |
86 | + val rawOut: RDD[(String, String, Long, Long)] = joinedFollowInfo.fullOuterJoin(debut_info).map(x=>{ | |
87 | + (x._1,(x._2._1.getOrElse(("",10L)) ,x._2._2.getOrElse(10L))) | |
85 | 88 | }).map(x=>(x._1,x._2._1._1, x._2._1._2, x._2._2)) |
86 | 89 | rawOut.map(x=>(x._1,x._2, math.min(20000,x._3),x._4)) |
87 | 90 | } |
88 | 91 | |
... | ... | @@ -99,11 +102,16 @@ |
99 | 102 | } |
100 | 103 | |
101 | 104 | def main(args:Array[String]) { |
105 | + val sc = new SparkContext(getSparkConf1) | |
106 | + val sqlContext = SQLContext.getOrCreate(sc) | |
107 | + val hadoopConf = sc.hadoopConfiguration | |
108 | + val fs = org.apache.hadoop.fs.FileSystem.get(hadoopConf) | |
109 | + | |
102 | 110 | val nowTS: Long = System.currentTimeMillis |
103 | 111 | val dateKey = getDateKey(nowTS) |
104 | 112 | |
105 | 113 | //에디터 인기 점수 구하기 |
106 | - val fromDB = editorDB(sqlContext, dateKey) | |
114 | + val fromDB = editorDB(sqlContext, fs, dateKey) | |
107 | 115 | |
108 | 116 | val ePopularity: RDD[((String, String), Double)] = make_0to1_2Key(popularity(fromDB, nowTS).map(x=>(x._1,1-MatrixFunctions.tanh(math.max(1,x._2))*(-1)-1))) |
109 | 117 |