Commit 75bb1e19bdc2b15a284da9f697caa27b0410edf7

Authored by evan ago
1 parent 8488ffd361
Exists in master

change save directory

Showing 3 changed files with 30 additions and 10 deletions Side-by-side Diff

app/com/piki_ds/ver2ggh/expConTime.scala View file @ 75bb1e1
... ... @@ -187,16 +187,14 @@
187 187 // (오픈, 소비) 정보에서 uuid별 로그 시간을 이용해 오픈 다음에 발생하는 같은 컨텐츠의 소비에 대해여 위치 및 오픈 경로 그리고 오픈타임 정보를 매핑
188 188 val openConsumeIdxed = consumeIndexing(base2)
189 189  
190   - //val openConsume = openConsumeIdxed.where("fromKey in ('h','m') and position != -1 and event = 'CONSUME'").
191   - val openConsume = openConsumeIdxed.where("event = 'CONSUME'").
  190 + val openConsume = openConsumeIdxed.where("fromKey in ('h','m') and position != -1 and event = 'CONSUME'").
192 191 groupBy("uuid", "cid", "fromKey", "position").
193 192 agg(expr("sum(consume) as consume"))
194 193  
195 194 // (노출, 오픈) 정보에서 uuid별 컨텐츠 오픈 위치 및 경로에 따른 노출시간 계산
196 195 val expTime = exposureTime(base3)
197 196  
198   - //val exposureInfo = expTime.where("fromKey in ('h','m') and position != -1")
199   - val exposureInfo = expTime
  197 + val exposureInfo = expTime.where("fromKey in ('h','m') and position != -1")
200 198  
201 199 val expCon = exposureInfo.join(openConsume, exposureInfo("uuid") === openConsume("uuid") && exposureInfo("cid") === openConsume("cid") &&
202 200 exposureInfo("fromKey") === openConsume("fromKey") && exposureInfo("position") === openConsume("position"), "leftouter").
app/com/piki_ds/ver2ggh/gghScore.scala View file @ 75bb1e1
... ... @@ -83,11 +83,26 @@
83 83 expr("sum(SNS) as SNS"),expr("sum(PANORAMA) as PANORAMA"),expr("sum(TEXT) as TEXT"),expr("sum(YOUTUBE) as YOUTUBE"),
84 84 expr("sum(INTR) as INTR"),expr("sum(VIDEO) as VIDEO"))
85 85  
86   - val cidCardTypeSize = cidCardType.join(cardSize, cidCardType("cid")===cardSize("cid1"),"leftouter").drop(cardSize("cid1")).drop(cardSize("cardGroup"))
  86 +
  87 + val contentsType = Util.tables("MG_CONTENTS").where("status='ACTV'").select(
  88 + expr("contents_id as cid"),
  89 + expr("case when contents_type = 'ALBUM' then 1 else 0 end as ALBUM"),
  90 + expr("case when contents_type = 'ALBUM.A' then 1 else 0 end as ALBUM_A"),
  91 + expr("case when contents_type = 'CHST' then 1 else 0 end as CHST"),
  92 + expr("case when contents_type = 'CHST.A' then 1 else 0 end as CHST_A"),
  93 + expr("case when contents_type = 'TOON' then 1 else 0 end as TOON"),
  94 + expr("case when contents_type = 'LIVE' then 1 else 0 end as LIVE")
  95 + )
  96 +
  97 + val cidCardTypeSize = cidCardType.join(cardSize, cidCardType("cid")===cardSize("cid1"),"leftouter").drop(cardSize("cid1")).drop(cardSize("cardGroup")).
  98 + join(contentsType, cidCardType("cid")===contentsType("cid")).drop(contentsType("cid"))
  99 +
87 100 val predData = cidCardTypeSize.map { line =>
88 101 LabeledPoint(line.getAs[Long]("cid"), Vectors.dense(line.getAs[Long]("cardSize").toDouble, line.getAs[Long]("LANDING").toDouble,
89 102 line.getAs[Long]("SHOPPING").toDouble, line.getAs[Long]("PHOTO").toDouble, line.getAs[Long]("SNS").toDouble, line.getAs[Long]("PANORAMA").toDouble,
90   - line.getAs[Long]("TEXT").toDouble, line.getAs[Long]("YOUTUBE").toDouble, line.getAs[Long]("INTR").toDouble, line.getAs[Long]("VIDEO").toDouble
  103 + line.getAs[Long]("TEXT").toDouble, line.getAs[Long]("YOUTUBE").toDouble, line.getAs[Long]("INTR").toDouble, line.getAs[Long]("VIDEO").toDouble,
  104 + line.getAs[Int]("ALBUM").toDouble, line.getAs[Int]("ALBUM_A").toDouble, line.getAs[Int]("CHST").toDouble, line.getAs[Int]("CHST_A").toDouble,
  105 + line.getAs[Int]("TOON").toDouble, line.getAs[Int]("LIVE").toDouble
91 106 ))
92 107 }
93 108  
... ... @@ -140,7 +155,11 @@
140 155  
141 156 val gghScaled = gghVer3.withColumn("gghScaled", (column("ggh") - gghMean) / gghStd).selectExpr("*", "1000 / (1 + exp(-gghScaled)) as scaledGgh").drop("gghScaled")
142 157  
143   - //////// CTR and CTR Time 계산
  158 + gghScaled.map{x =>
  159 + s"${x(0)},${x(5)}"
  160 + }.saveAsTextFile(s"hdfs://pikinn/preprocess/timelineScore/content/ggh/$saveDay")
  161 +
  162 + /*//////// CTR and CTR Time 계산
144 163 val ctr = expConsume.groupBy("cid").
145 164 agg(expr("sum(expTime1) as expTime1"), expr("sum(expSize1) as expSize1"), expr("sum(expTime2) as expTime2"),
146 165 expr("sum(expSize2) as expSize2"), expr("sum(consume) as consume"), expr("count(consume) as conCount")).
147 166  
148 167  
... ... @@ -160,13 +179,15 @@
160 179 val gghResult = result2.join(ctr.selectExpr("cid1","ctr as noRankCtr","ctrTime as noRankCtrTime","expTime2 as expTime","expSize2 as expSize","consume","conCount"),
161 180 result2("cid")===ctr("cid1"), "leftouter").drop(ctr("cid1")).drop("uid").drop("totalExpSize2")
162 181  
163   - gghResult.write.mode(SaveMode.Overwrite).parquet(s"hdfs://pikinn/preprocess/ggh/$saveDay")
  182 + //gghResult.write.mode(SaveMode.Overwrite).parquet(s"hdfs://pikinn/preprocess/ggh/$saveDay")
164 183  
165   - /*gghResult.map{x=>
  184 + gghResult.map{x=>
166 185 val title = x.getAs[String]("title").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("\\|", " ").replaceAll("\\,", " ").trim
167 186 val editor = x.getAs[String]("name").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("\\|", " ").replaceAll("\\,", " ").trim
168 187 s"${x(0)}|${title}|${x(5)}|${editor}|${x(1)}|${x(2)}|${x(3)}|${x(4)}|${x(8)}|${x(9)}|${x(10)}|${x(11)}|${x(12)}|${x(13)}"}.
169   - coalesce(1, shuffle = true).saveAsTextFile(s"hdfs://pikinn/user/evan/Features/table=ggh/dt=${saveDay}")*/
  188 + coalesce(1, shuffle = true).saveAsTextFile(s"hdfs://pikinn/preprocess/timelineScore/content/ggh/$saveDay")
  189 + */
  190 +
170 191 }
171 192  
172 193 }
app/com/piki_ds/ver2ggh/simContentsModel.scala View file @ 75bb1e1
... ... @@ -124,6 +124,7 @@
124 124 case _: Throwable => {}
125 125 }
126 126 model.save(sc, s"hdfs://pikinn/user/evan/Features/cardTypeConsume/RFModel")
  127 +
127 128 val RFmodel = RandomForestModel.load(sc, s"hdfs://pikinn/user/evan/Features/cardTypeConsume/RFModel")
128 129  
129 130 val allPredictions = parsedData1.collect.map { point =>