Commit d8c6c1a5d06a6f53eee60fab21c8f510f7f9e46e

Authored by evan ago
1 parent 856dd1fc5b
Exists in master

arg change and bash.sh add

Showing 2 changed files with 50 additions and 3 deletions Side-by-side Diff

app/com/piki_ds/ver2ggh/gghScore.scala View file @ d8c6c1a
... ... @@ -32,7 +32,7 @@
32 32 val currentTime = new Date()
33 33  
34 34 val day_delm = 24 * 1000 * 60 * 60L
35   - val saveDay = if (args.length >= 1) args(0) else ""
  35 + val saveDay = if (args.length >= 1) args(0) else format.format(currentTime)
36 36 val ind = -6 to 0
37 37 val dateSet = ind.map(x => {
38 38 format.format(format.parse(saveDay).getTime + day_delm * x)
39 39  
... ... @@ -160,11 +160,13 @@
160 160 val gghResult = result2.join(ctr.selectExpr("cid1","ctr as noRankCtr","ctrTime as noRankCtrTime","expTime2 as expTime","expSize2 as expSize","consume","conCount"),
161 161 result2("cid")===ctr("cid1"), "leftouter").drop(ctr("cid1")).drop("uid").drop("totalExpSize2")
162 162  
163   - gghResult.map{x=>
  163 + gghResult.write.mode(SaveMode.Overwrite).parquet(s"hdfs://pikinn/preprocess/ggh/$saveDay")
  164 +
  165 + /*gghResult.map{x=>
164 166 val title = x.getAs[String]("title").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("\\|", " ").replaceAll("\\,", " ").trim
165 167 val editor = x.getAs[String]("name").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("\\|", " ").replaceAll("\\,", " ").trim
166 168 s"${x(0)}|${title}|${x(5)}|${editor}|${x(1)}|${x(2)}|${x(3)}|${x(4)}|${x(8)}|${x(9)}|${x(10)}|${x(11)}|${x(12)}|${x(13)}"}.
167   - coalesce(1, shuffle = true).saveAsTextFile(s"hdfs://pikinn/user/evan/Features/table=ggh/dt=${saveDay}")
  169 + coalesce(1, shuffle = true).saveAsTextFile(s"hdfs://pikinn/user/evan/Features/table=ggh/dt=${saveDay}")*/
168 170 }
169 171  
170 172 }
  1 +#!/usr/bin/env
  2 +
  3 +BASEDIR=$(dirname $0)
  4 +
  5 +if [ $# != '1' ] && [ $# != '2' ];
  6 +then
  7 + echo "usage : sh score.sh <scalaClassName>"
  8 + echo "ex) sh score.sh EditorScore"
  9 + exit
  10 +fi
  11 +
  12 +HOME=$BASEDIR
  13 +LOG_DIR=$HOME/logs
  14 +mkdir -p $LOG_DIR
  15 +TODAY=`date +"%Y%m%d"`
  16 +DATE_SUB="7"
  17 +DAY_TO_DELETE="$(date "+%Y%m%d" -d "$DATE_SUB days ago")"
  18 +DELETE_LOG="${LOG_DIR}/$1_$DAY_TO_DELETE.log"
  19 +LOG="${LOG_DIR}/$1_$2_$TODAY.log"
  20 +
  21 +#HADOOP_CONF_DIR=/etc/hadoop/conf
  22 +
  23 +/data/spark/bin/spark-submit \
  24 +--class $1 \
  25 +--master yarn-client \
  26 +--conf "spark.default.parallelism=250" \
  27 +$BASEDIR/target/scala-2.11/dsquality-assembly-0.1.0-SNAPSHOT.jar $2 >> $LOG 2>&1
  28 +#target/scala-2.11/dsmakingscore-assembly-0.1.0-SNAPSHOT.jar >> $LOG 2>&1
  29 +#target/scala-2.11/dsmakingscore_2.11-0.1.0-SNAPSHOT.jar >> $LOG 2>&1
  30 +#--jars "lib_managed/jars/mysql/mysql-connector-java/mysql-connector-java-5.1.36.jar" \
  31 +
  32 +
  33 +echo "END END END END END" >> $LOG
  34 +echo "END END END END END" >> $LOG
  35 +echo "END END END END END" >> $LOG
  36 +echo "END END END END END" >> $LOG
  37 +echo "END END END END END" >> $LOG
  38 +
  39 + echo "IF LOG FILE $DELETE_LOG EXISTS DELETE" >> $LOG
  40 +# Log rotation
  41 +if [ -f $DELETE_LOG ]
  42 +then
  43 + rm -f $DELETE_LOG >> $LOG 2>&1
  44 + echo "$DELETE_LOG deleted" >> $LOG
  45 +fi