Commit cfdd3d396bf63ab144cd174a5a0cdd86baaa55af

Authored by Noah ago
1 parent 21b1f6af48
Exists in master

본문, 댓글 http://bit.ly/ url, 피키 콘텐츠 매핑 수집

Showing 6 changed files with 113 additions and 37 deletions Side-by-side Diff

insight/antishortener.py View file @ cfdd3d3
1   -# -*- coding: utf-8 -*-
2   -
3   -import requests
4   -
5   -class crawlShortener() :
6   -
7   - def runCrawler(self, links):
8   -
9   - for link in links:
10   -
11   - link_meta = link + "+"
12   - txt = requests.get(link_meta).text
13   -
14   - source_tag_op = "\"long_url\": \""
15   - source_tag_cl = "\""
16   -
17   - clicks_tag_op = "\"user_clicks\": "
18   - clicks_tag_cl = ","
19   -
20   - source_bgn = txt.find(source_tag_op) + len(source_tag_op)
21   - source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl)
22   - clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op)
23   - clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl)
24   -
25   - print str(link)
26   - print str(txt[source_bgn:source_end])
27   - print str(txt[clicks_bgn:clicks_end])
insight/base62.py View file @ cfdd3d3
  1 +
  2 +
  3 +class Base62() :
  4 + set = ['0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
  5 +
  6 +
  7 + def encode (self,i):
  8 +
  9 + if (i <= 0) :
  10 + return "0"
  11 + ret = ""
  12 + while (i > 0) :
  13 + ret = self.set[i%62] + ret
  14 + i = i / 62
  15 + return ret
  16 +
  17 + def decode (self,s):
  18 +
  19 + if (str(s) <= 0) :
  20 + return 0
  21 + ret = 0
  22 + len = s.__len__()
  23 + for i in range(len):
  24 + c = s[i]
  25 + for j in range(62) :
  26 + if c == self.set[j] :
  27 + ret = ret + (j * pow(62,(len - i - 1)))
  28 + break
  29 + return ret
insight/crawlShortener.py View file @ cfdd3d3
  1 +# -*- coding: utf-8 -*-
  2 +
  3 +import requests
  4 +from base62 import Base62
  5 +
  6 +class CrawlShortener() :
  7 +
  8 + def runCrawler(self, links):
  9 + data = []
  10 +
  11 + print links
  12 +
  13 + try:
  14 + for link in links:
  15 +
  16 + if len(link) < 5:
  17 + continue
  18 +
  19 + link_meta = link + "+"
  20 + txt = requests.get(link_meta).text
  21 +
  22 + source_tag_op = "\"long_url\": \""
  23 + source_tag_cl = "\""
  24 +
  25 + clicks_tag_op = "\"user_clicks\": "
  26 + clicks_tag_cl = ","
  27 +
  28 + source_bgn = txt.find(source_tag_op) + len(source_tag_op)
  29 + source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl)
  30 + clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op)
  31 + clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl)
  32 +
  33 + try:
  34 + piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0]
  35 + except:
  36 + piki_url = str(0)
  37 +
  38 + data.append({
  39 + 'bit_url' : str(link),
  40 + 'origin_url': str(txt[source_bgn:source_end]),
  41 + 'bit_click': str(txt[clicks_bgn:clicks_end]),
  42 + 'piki_cid' : str(Base62().decode(piki_url))
  43 + })
  44 +
  45 + except:
  46 + data.append({
  47 + 'bit_url' : "",
  48 + 'origin_url': "",
  49 + 'bit_click': "0",
  50 + 'piki_cid' : "0"
  51 + })
  52 +
  53 + if len(data) == 0:
  54 + data.append({'bit_url' : "",'origin_url': "",'bit_click': "0",'piki_cid' : "0"})
  55 +
  56 + return data
insight/postinsight.py View file @ cfdd3d3
... ... @@ -3,7 +3,7 @@
3 3  
4 4 import requests
5 5 import json
6   -import util
  6 +from util import Util
7 7  
8 8  
9 9 class PostInsight() :
... ... @@ -63,7 +63,7 @@
63 63  
64 64 def setContentComment(self, data):
65 65 try:
66   - self.comment = util.Util().getText2Url(data['comments']['data'][0]['message'])
  66 + self.comment = Util().getText2bitly(data['comments']['data'][0]['message'])
67 67 print self.comment
68 68 except:
69 69 self.comment = ""
... ... @@ -72,6 +72,6 @@
72 72 def getContentType(self):
73 73 return self.type
74 74  
75   - def getContentComment(self):
  75 + def getContentCommentUrl(self):
76 76 return self.comment
insight/util.py View file @ cfdd3d3
... ... @@ -3,8 +3,8 @@
3 3  
4 4  
5 5 class Util() :
6   - def getText2Url(self,text):
7   - return "http://" + text.split("http://")[1].split(" ")[0]
  6 + def getText2bitly(self,text):
  7 + return "http://bit.ly/" + text.split("http://bit.ly/")[1].split(" ")[0]
8 8  
9 9 #util = Util()
10 10 #print util.getText2Url("실시간 후끈후끈한 사용 후기.jpg지금 달려!!!!!!! ▶http://bit.ly/29MEr2Z")
... ... @@ -6,6 +6,7 @@
6 6 from insight.postinsight import PostInsight
7 7 from insight.datadb import DataDB
8 8 from insight.util import Util
  9 +from insight.crawlShortener import CrawlShortener
9 10 import time
10 11  
11 12  
12 13  
13 14  
14 15  
... ... @@ -33,16 +34,17 @@
33 34 try:
34 35 message = content['message'].replace('\n',"").replace('\r',"").replace('\'',"")
35 36 except:
36   - message = " "
  37 + message = ""
37 38  
38 39 try:
39   - message_url = util.getText2Url(content['message'])
  40 + message_url = util.getText2bitly(content['message'])
40 41 except:
41   - message_url = " "
  42 + message_url = ""
42 43  
43 44  
44 45 insight = PostInsight(token)
45 46 insight.setContentInsight(content['id'])
  47 +
46 48 lists =["post_story_adds_unique",
47 49 "post_story_adds",
48 50 "post_story_adds_by_action_type_unique,comment",
49 51  
50 52  
... ... @@ -97,15 +99,29 @@
97 99 sqlprefix += "`created_time`, "
98 100 sqlprefix += "`loging_time`, "
99 101  
  102 + sqlprefix += "`bit_url`, "
  103 + sqlprefix += "`bit_click`, "
  104 + sqlprefix += "`origin_url`, "
  105 + sqlprefix += "`piki_cid`, "
  106 +
  107 +
100 108 sqlvalues += p_id + ", "
101 109 sqlvalues += c_id + ", "
102 110 sqlvalues += "'" + insight.getContentType() + "', "
103 111 sqlvalues += "'" + message + "', "
104 112 sqlvalues += "'" + message_url + "', "
105   - sqlvalues += "'" + insight.getContentComment() + "', "
  113 + sqlvalues += "'" + insight.getContentCommentUrl() + "', "
106 114 sqlvalues += "'" + created_time + "', "
107 115 sqlvalues += "NOW(), "
108 116  
  117 + bitdata = CrawlShortener().runCrawler([message_url,insight.getContentCommentUrl()])[0]
  118 + print bitdata
  119 +
  120 + sqlvalues += "'" + bitdata['bit_url'] + "', "
  121 + sqlvalues += bitdata['bit_click'] + ", "
  122 + sqlvalues += "'" + bitdata['origin_url'] + "', "
  123 + sqlvalues += bitdata['piki_cid'] + ", "
  124 +
109 125 for list in lists:
110 126 #print list, insight.getContentInsightByKey(list)
111 127 sqlprefix += "`" + list.replace(',','_') + "`, "
112 128  
... ... @@ -115,9 +131,11 @@
115 131 sqlprefix = sqlprefix[:len(sqlprefix)-2] + ")"
116 132 sqlvalues = sqlvalues[:len(sqlvalues)-2] + ");"
117 133  
  134 + print sqlprefix + sqlvalues
  135 +
118 136 datadb.fb_insert(sqlprefix + sqlvalues)
119 137 datadb.fb_insert(sqlreplace + sqlprefix[32:] + sqlvalues)
120   - #print sqlprefix + sqlvalues
  138 +
121 139  
122 140 del(datadb)
123 141 del(insight)