Commit 2a89435c7bff276775babe82fa71732a43aada2b

Authored by steve ago
Exists in master

Merge branch 'master' of http://gitlab.pikicast.com/Noah/dsfacebook

Showing 2 changed files Inline Diff

insight/url.py View file @ 2a89435
# -*- coding: utf-8 -*- 1 1 # -*- coding: utf-8 -*-
2 2
import requests 3 3 import requests
from base62 import Base62 4 4 from base62 import Base62
import json 5 5 import json
import re 6 6 import re
7 7
class Url() : 8 8 class Url() :
9 9
def url2dic(self, links): 10 10 def url2dic(self, links):
data = [] 11 11 data = []
12 12
if (True): 13 13 if (True):
for link in links: 14 14 for link in links:
15 15
if len(link) < 5: 16 16 if len(link) < 5:
continue 17 17 continue
18 18
if self.isdeep(link) : 19 19 if self.isdeep(link) :
20 20
data.append({ 21 21 data.append({
'bitly_url' : "", 22 22 'bitly_url' : "",
'origin_url': str(link), 23 23 'origin_url': str(link),
'bitly_click': "0", 24 24 'bitly_click': "0",
'piki_cid' : str(self.Url2Cid(link)), 25 25 'piki_cid' : str(self.Url2Cid(link)),
'rpiki_click' : "0" 26 26 'rpiki_click' : "0"
}) 27 27 })
28 28
elif self.isrpiki(link) : 29 29 elif self.isrpiki(link) :
30 30
data.append({ 31 31 data.append({
'bitly_url' : "", 32 32 'bitly_url' : "",
'origin_url': str(link), 33 33 'origin_url': str(link),
'bitly_click': "0", 34 34 'bitly_click': "0",
'piki_cid' : str(self.Url2Cid(link)), 35 35 'piki_cid' : str(self.Url2Cid(link)),
'rpiki_click' : str(self.rpiki2click(link)) 36 36 'rpiki_click' : str(self.rpiki2click(link))
}) 37 37 })
38 38
elif self.isbitly(link): 39 39 elif self.isbitly(link):
40 40
try: 41 41 try:
link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) 42 42 link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0])
43 43
link_meta = link + "+" 44 44 link_meta = link + "+"
txt = requests.get(link_meta).text 45 45 txt = requests.get(link_meta).text
46 46
source_tag_op = "\"long_url\": \"" 47 47 source_tag_op = "\"long_url\": \""
source_tag_cl = "\"" 48 48 source_tag_cl = "\""
49 49
clicks_tag_op = "\"user_clicks\": " 50 50 clicks_tag_op = "\"user_clicks\": "
clicks_tag_cl = "," 51 51 clicks_tag_cl = ","
52 52
source_bgn = txt.find(source_tag_op) + len(source_tag_op) 53 53 source_bgn = txt.find(source_tag_op) + len(source_tag_op)
source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) 54 54 source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl)
clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) 55 55 clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op)
clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) 56 56 clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl)
57 57
except: 58 58 except:
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) 59 59 data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"})
60 60
try: 61 61 try:
piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] 62 62 piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0]
except: 63 63 except:
piki_url = str(0) 64 64 piki_url = str(0)
65 65
if self.isrpiki(txt[source_bgn:source_end]) : 66 66 if self.isrpiki(txt[source_bgn:source_end]) :
data.append({ 67 67 data.append({
'bitly_url' : str(link), 68 68 'bitly_url' : str(link),
'origin_url': str(txt[source_bgn:source_end]), 69 69 'origin_url': str(txt[source_bgn:source_end]),
'bitly_click': str(txt[clicks_bgn:clicks_end]), 70 70 'bitly_click': str(txt[clicks_bgn:clicks_end]),
'piki_cid' : str(Base62().decode(piki_url)), 71 71 'piki_cid' : str(Base62().decode(piki_url)),
'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) 72 72 'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end]))
}) 73 73 })
74 74
else: 75 75 else:
print link 76 76 #print link
data.append({ 77 77 data.append({
'bitly_url' : str(link), 78 78 'bitly_url' : str(link),
'origin_url': str(txt[source_bgn:source_end]), 79 79 'origin_url': str(txt[source_bgn:source_end]),
'bitly_click': str(txt[clicks_bgn:clicks_end]), 80 80 'bitly_click': str(txt[clicks_bgn:clicks_end]),
'piki_cid' : str(Base62().decode(piki_url)), 81 81 'piki_cid' : str(Base62().decode(piki_url)),
'rpiki_click' : "0" 82 82 'rpiki_click' : "0"
}) 83 83 })
84 84
if len(data) == 0: 85 85 if len(data) == 0:
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) 86 86 data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"})
87 87
return data 88 88 return data
89 89
90 90
91 91
def Url2Cid(self,url): 92 92 def Url2Cid(self,url):
93 93
if self.isrpiki(url): 94 94 if self.isrpiki(url):
return Base62().decode(url.split("cid=")[1].split("&")[0]) 95 95 return Base62().decode(url.split("cid=")[1].split("&")[0])
elif self.isdeep(url): 96 96 elif self.isdeep(url):
try: 97 97 try:
return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] 98 98 return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0]
except : 99 99 except :
return "0" 100 100 return "0"
101 101
def rpiki2click(self,url): 102 102 def rpiki2click(self,url):
#print url 103 103 #print url
api = "http://contents-data.pikicast.com/contents_RPIKI_api/" 104 104 api = "http://data2.piki.work/contents_RPIKI_api/"
105 105
try: 106 106 try:
cid = url.split("cid=")[1].split("&")[0] 107 107 cid = url.split("cid=")[1].split("&")[0]
fr = url.split("fr=")[1].split("&")[0] 108 108 fr = url.split("fr=")[1].split("&")[0]
m = url.split("m=")[1].split("&")[0] 109 109 m = url.split("m=")[1].split("&")[0]
c = url.split("c=")[1].split("&")[0] 110 110 c = url.split("c=")[1].split("&")[0]
v = url.split("v=")[1].split("&")[0] 111 111 v = url.split("v=")[1].split("&")[0]
#!/usr/bin/env python 1 1 #!/usr/bin/env python
# -*- coding: utf-8 -*- 2 2 # -*- coding: utf-8 -*-
3 3
from insight.context import Context 4 4 from insight.context import Context
from insight.token import Token 5 5 from insight.token import Token
from insight.postinsight import PostInsight 6 6 from insight.postinsight import PostInsight
from insight.datadb import DataDB 7 7 from insight.datadb import DataDB
8 8
from insight.url import Url 9 9 from insight.url import Url
import time 10 10 import time
11 11
12 12
if __name__=='__main__': 13 13 if __name__=='__main__':
14 14
#token_str = "EAAUTLd5JgaoBAJMeeMXqcdExQ1egUHeBaIgVBCilmiH4K9RNyUt7gSgVZCZAtszWCLEaZCDQpxewhICtFjNRICFPWAqUygshcSsdEZBUeZAyUJkON7bfQ2NFFI5AqifNahzjFT83GkWZCZCZBXO3050XSjFf9HSR0iAZD" 15 15 #token_str = "EAAUTLd5JgaoBAJMeeMXqcdExQ1egUHeBaIgVBCilmiH4K9RNyUt7gSgVZCZAtszWCLEaZCDQpxewhICtFjNRICFPWAqUygshcSsdEZBUeZAyUJkON7bfQ2NFFI5AqifNahzjFT83GkWZCZCZBXO3050XSjFf9HSR0iAZD"
token = Token() 16 16 token = Token()
17 17
contexts = Context(token) 18 18 contexts = Context(token)
contexts.setContextsFeedAndTimestemp(3600 * 24 * 10 ) 19 19 contexts.setContextsFeedAndTimestemp(3600 * 24 * 10)
contents_list = contexts.getContentsList() 20 20 contents_list = contexts.getContentsList()
21 21
for content in contents_list: 22 22 for content in contents_list:
datadb = DataDB() 23 23 datadb = DataDB()
time.sleep(1) 24 24 time.sleep(1)
p_id = str(content['id'].split('_')[0]) 25 25 p_id = str(content['id'].split('_')[0])
c_id = str(content['id'].split('_')[1]) 26 26 c_id = str(content['id'].split('_')[1])
created_time = content['created_time'].replace('T'," ").replace('+0000',"") 27 27 created_time = content['created_time'].replace('T'," ").replace('+0000',"")
28 28
try: 29 29 try:
message = content['message'].replace('\n'," ").replace('\r'," ").replace('\'',"") 30 30 message = content['message'].replace('\n'," ").replace('\r'," ").replace('\'',"")
except: 31 31 except:
message = "" 32 32 message = ""
33 33
try: 34 34 try:
message_url = Url().getText2bitly(message) 35 35 message_url = Url().getText2bitly(message)
except: 36 36 except:
message_url = "" 37 37 message_url = ""
38 38
insight = PostInsight(token) 39 39 insight = PostInsight(token)
insight.setContentInsight(content['id']) 40 40 insight.setContentInsight(content['id'])
41 41
lists =["post_story_adds_unique", 42 42 lists =["post_story_adds_unique",
"post_story_adds", 43 43 "post_story_adds",
"post_story_adds_by_action_type_unique,comment", 44 44 "post_story_adds_by_action_type_unique,comment",
"post_story_adds_by_action_type_unique,like", 45 45 "post_story_adds_by_action_type_unique,like",
"post_story_adds_by_action_type_unique,share", 46 46 "post_story_adds_by_action_type_unique,share",
"post_story_adds_by_action_type,comment", 47 47 "post_story_adds_by_action_type,comment",
"post_impressions", 48 48 "post_impressions",
"post_impressions_paid_unique", 49 49 "post_impressions_paid_unique",
"post_impressions_paid", 50 50 "post_impressions_paid",
"post_story_adds_by_action_type,like", 51 51 "post_story_adds_by_action_type,like",
"post_story_adds_by_action_type,share", 52 52 "post_story_adds_by_action_type,share",
"post_impressions_unique", 53 53 "post_impressions_unique",
"post_impressions_organic_unique", 54 54 "post_impressions_organic_unique",
"post_impressions_organic", 55 55 "post_impressions_organic",
"post_impressions_by_story_type_unique,other", 56 56 "post_impressions_by_story_type_unique,other",
"post_impressions_by_story_type,other", 57 57 "post_impressions_by_story_type,other",
"post_consumptions_by_type_unique,other clicks", 58 58 "post_consumptions_by_type_unique,other clicks",
"post_consumptions_by_type_unique,photo view", 59 59 "post_consumptions_by_type_unique,photo view",
"post_consumptions_by_type_unique,video play", 60 60 "post_consumptions_by_type_unique,video play",
"post_consumptions_by_type_unique,link clicks", 61 61 "post_consumptions_by_type_unique,link clicks",
"post_consumptions_by_type,other clicks", 62 62 "post_consumptions_by_type,other clicks",
"post_consumptions_by_type,photo view", 63 63 "post_consumptions_by_type,photo view",
"post_consumptions_by_type,video play", 64 64 "post_consumptions_by_type,video play",
"post_consumptions_by_type,link clicks", 65 65 "post_consumptions_by_type,link clicks",
"post_engaged_users", 66 66 "post_engaged_users",
"post_video_views", 67 67 "post_video_views",
"post_video_views_unique", 68 68 "post_video_views_unique",
"post_video_views_paid", 69 69 "post_video_views_paid",
"post_video_views_autoplayed", 70 70 "post_video_views_autoplayed",
"post_video_views_10s", 71 71 "post_video_views_10s",
"post_video_views_10s_unique", 72 72 "post_video_views_10s_unique",
"post_video_views_10s_paid", 73 73 "post_video_views_10s_paid",
"post_video_views_10s_organic", 74 74 "post_video_views_10s_organic",
"post_video_views_10s_clicked_to_play", 75 75 "post_video_views_10s_clicked_to_play",
"post_video_views_10s_autoplayed", 76 76 "post_video_views_10s_autoplayed",
"post_video_views_10s_sound_on", 77 77 "post_video_views_10s_sound_on",
"post_video_views_sound_on", 78 78 "post_video_views_sound_on",
"post_video_view_time", 79 79 "post_video_view_time",
"post_video_complete_views_organic", 80 80 "post_video_complete_views_organic",
"post_video_complete_views_paid"] 81 81 "post_video_complete_views_paid"]
82 82
sqlprefix = "insert into facebook_insights2 (" 83 83 sqlprefix = "insert into facebook_insights2 ("
sqlreplace = "REPLACE into facebook_insights2_last (" 84 84 sqlreplace = "REPLACE into facebook_insights2_last ("
sqlvalues = " values (" 85 85 sqlvalues = " values ("
86 86
sqlprefix += "`p_id`, " 87 87 sqlprefix += "`p_id`, "
sqlprefix += "`c_id`, " 88 88 sqlprefix += "`c_id`, "
sqlprefix += "`type`, " 89 89 sqlprefix += "`type`, "
sqlprefix += "`message`, " 90 90 sqlprefix += "`message`, "
sqlprefix += "`message_url`, " 91 91 sqlprefix += "`message_url`, "
sqlprefix += "`comment_url`, " 92 92 sqlprefix += "`comment_url`, "
sqlprefix += "`created_time`, " 93 93 sqlprefix += "`created_time`, "
sqlprefix += "`loging_time`, " 94 94 sqlprefix += "`loging_time`, "
95 95
sqlprefix += "`bit_url`, " 96 96 sqlprefix += "`bit_url`, "
sqlprefix += "`bit_click`, " 97 97 sqlprefix += "`bit_click`, "
sqlprefix += "`origin_url`, " 98 98 sqlprefix += "`origin_url`, "
sqlprefix += "`piki_cid`, " 99 99 sqlprefix += "`piki_cid`, "
sqlprefix += "`rpiki_click`, " 100 100 sqlprefix += "`rpiki_click`, "
101 101
sqlvalues += p_id + ", " 102 102 sqlvalues += p_id + ", "
sqlvalues += c_id + ", " 103 103 sqlvalues += c_id + ", "
sqlvalues += "'" + insight.getContentType() + "', " 104 104 sqlvalues += "'" + insight.getContentType() + "', "
sqlvalues += "'" + message + "', " 105 105 sqlvalues += "'" + message + "', "
sqlvalues += "'" + message_url + "', " 106 106 sqlvalues += "'" + message_url + "', "
sqlvalues += "'" + insight.getContentCommentUrl() + "', " 107 107 sqlvalues += "'" + insight.getContentCommentUrl() + "', "
sqlvalues += "'" + created_time + "', " 108 108 sqlvalues += "'" + created_time + "', "
sqlvalues += "NOW(), " 109 109 sqlvalues += "NOW(), "
110 110
url_data = Url().url2dic([insight.getLinkUrl(),message_url,insight.getContentCommentUrl()])[0] 111 111 url_data = Url().url2dic([insight.getLinkUrl(),message_url,insight.getContentCommentUrl()])[0]