Commit 2a89435c7bff276775babe82fa71732a43aada2b
Exists in
master
Merge branch 'master' of http://gitlab.pikicast.com/Noah/dsfacebook
Showing 2 changed files Inline Diff
insight/url.py
View file @
2a89435
# -*- coding: utf-8 -*- | 1 | 1 | # -*- coding: utf-8 -*- | |
2 | 2 | |||
import requests | 3 | 3 | import requests | |
from base62 import Base62 | 4 | 4 | from base62 import Base62 | |
import json | 5 | 5 | import json | |
import re | 6 | 6 | import re | |
7 | 7 | |||
class Url() : | 8 | 8 | class Url() : | |
9 | 9 | |||
def url2dic(self, links): | 10 | 10 | def url2dic(self, links): | |
data = [] | 11 | 11 | data = [] | |
12 | 12 | |||
if (True): | 13 | 13 | if (True): | |
for link in links: | 14 | 14 | for link in links: | |
15 | 15 | |||
if len(link) < 5: | 16 | 16 | if len(link) < 5: | |
continue | 17 | 17 | continue | |
18 | 18 | |||
if self.isdeep(link) : | 19 | 19 | if self.isdeep(link) : | |
20 | 20 | |||
data.append({ | 21 | 21 | data.append({ | |
'bitly_url' : "", | 22 | 22 | 'bitly_url' : "", | |
'origin_url': str(link), | 23 | 23 | 'origin_url': str(link), | |
'bitly_click': "0", | 24 | 24 | 'bitly_click': "0", | |
'piki_cid' : str(self.Url2Cid(link)), | 25 | 25 | 'piki_cid' : str(self.Url2Cid(link)), | |
'rpiki_click' : "0" | 26 | 26 | 'rpiki_click' : "0" | |
}) | 27 | 27 | }) | |
28 | 28 | |||
elif self.isrpiki(link) : | 29 | 29 | elif self.isrpiki(link) : | |
30 | 30 | |||
data.append({ | 31 | 31 | data.append({ | |
'bitly_url' : "", | 32 | 32 | 'bitly_url' : "", | |
'origin_url': str(link), | 33 | 33 | 'origin_url': str(link), | |
'bitly_click': "0", | 34 | 34 | 'bitly_click': "0", | |
'piki_cid' : str(self.Url2Cid(link)), | 35 | 35 | 'piki_cid' : str(self.Url2Cid(link)), | |
'rpiki_click' : str(self.rpiki2click(link)) | 36 | 36 | 'rpiki_click' : str(self.rpiki2click(link)) | |
}) | 37 | 37 | }) | |
38 | 38 | |||
elif self.isbitly(link): | 39 | 39 | elif self.isbitly(link): | |
40 | 40 | |||
try: | 41 | 41 | try: | |
link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) | 42 | 42 | link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) | |
43 | 43 | |||
link_meta = link + "+" | 44 | 44 | link_meta = link + "+" | |
txt = requests.get(link_meta).text | 45 | 45 | txt = requests.get(link_meta).text | |
46 | 46 | |||
source_tag_op = "\"long_url\": \"" | 47 | 47 | source_tag_op = "\"long_url\": \"" | |
source_tag_cl = "\"" | 48 | 48 | source_tag_cl = "\"" | |
49 | 49 | |||
clicks_tag_op = "\"user_clicks\": " | 50 | 50 | clicks_tag_op = "\"user_clicks\": " | |
clicks_tag_cl = "," | 51 | 51 | clicks_tag_cl = "," | |
52 | 52 | |||
source_bgn = txt.find(source_tag_op) + len(source_tag_op) | 53 | 53 | source_bgn = txt.find(source_tag_op) + len(source_tag_op) | |
source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) | 54 | 54 | source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) | |
clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) | 55 | 55 | clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) | |
clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) | 56 | 56 | clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) | |
57 | 57 | |||
except: | 58 | 58 | except: | |
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | 59 | 59 | data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | |
60 | 60 | |||
try: | 61 | 61 | try: | |
piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] | 62 | 62 | piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] | |
except: | 63 | 63 | except: | |
piki_url = str(0) | 64 | 64 | piki_url = str(0) | |
65 | 65 | |||
if self.isrpiki(txt[source_bgn:source_end]) : | 66 | 66 | if self.isrpiki(txt[source_bgn:source_end]) : | |
data.append({ | 67 | 67 | data.append({ | |
'bitly_url' : str(link), | 68 | 68 | 'bitly_url' : str(link), | |
'origin_url': str(txt[source_bgn:source_end]), | 69 | 69 | 'origin_url': str(txt[source_bgn:source_end]), | |
'bitly_click': str(txt[clicks_bgn:clicks_end]), | 70 | 70 | 'bitly_click': str(txt[clicks_bgn:clicks_end]), | |
'piki_cid' : str(Base62().decode(piki_url)), | 71 | 71 | 'piki_cid' : str(Base62().decode(piki_url)), | |
'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) | 72 | 72 | 'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) | |
}) | 73 | 73 | }) | |
74 | 74 | |||
else: | 75 | 75 | else: | |
print link | 76 | 76 | #print link | |
data.append({ | 77 | 77 | data.append({ | |
'bitly_url' : str(link), | 78 | 78 | 'bitly_url' : str(link), | |
'origin_url': str(txt[source_bgn:source_end]), | 79 | 79 | 'origin_url': str(txt[source_bgn:source_end]), | |
'bitly_click': str(txt[clicks_bgn:clicks_end]), | 80 | 80 | 'bitly_click': str(txt[clicks_bgn:clicks_end]), | |
'piki_cid' : str(Base62().decode(piki_url)), | 81 | 81 | 'piki_cid' : str(Base62().decode(piki_url)), | |
'rpiki_click' : "0" | 82 | 82 | 'rpiki_click' : "0" | |
}) | 83 | 83 | }) | |
84 | 84 | |||
if len(data) == 0: | 85 | 85 | if len(data) == 0: | |
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | 86 | 86 | data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | |
87 | 87 | |||
return data | 88 | 88 | return data | |
89 | 89 | |||
90 | 90 | |||
91 | 91 | |||
def Url2Cid(self,url): | 92 | 92 | def Url2Cid(self,url): | |
93 | 93 | |||
if self.isrpiki(url): | 94 | 94 | if self.isrpiki(url): | |
return Base62().decode(url.split("cid=")[1].split("&")[0]) | 95 | 95 | return Base62().decode(url.split("cid=")[1].split("&")[0]) | |
elif self.isdeep(url): | 96 | 96 | elif self.isdeep(url): | |
try: | 97 | 97 | try: | |
return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] | 98 | 98 | return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] | |
except : | 99 | 99 | except : | |
return "0" | 100 | 100 | return "0" | |
101 | 101 | |||
def rpiki2click(self,url): | 102 | 102 | def rpiki2click(self,url): | |
#print url | 103 | 103 | #print url | |
api = "http://contents-data.pikicast.com/contents_RPIKI_api/" | 104 | 104 | api = "http://data2.piki.work/contents_RPIKI_api/" | |
105 | 105 | |||
try: | 106 | 106 | try: | |
cid = url.split("cid=")[1].split("&")[0] | 107 | 107 | cid = url.split("cid=")[1].split("&")[0] | |
fr = url.split("fr=")[1].split("&")[0] | 108 | 108 | fr = url.split("fr=")[1].split("&")[0] | |
m = url.split("m=")[1].split("&")[0] | 109 | 109 | m = url.split("m=")[1].split("&")[0] | |
c = url.split("c=")[1].split("&")[0] | 110 | 110 | c = url.split("c=")[1].split("&")[0] | |
v = url.split("v=")[1].split("&")[0] | 111 | 111 | v = url.split("v=")[1].split("&")[0] |
runmain.py
View file @
2a89435
#!/usr/bin/env python | 1 | 1 | #!/usr/bin/env python | |
# -*- coding: utf-8 -*- | 2 | 2 | # -*- coding: utf-8 -*- | |
3 | 3 | |||
from insight.context import Context | 4 | 4 | from insight.context import Context | |
from insight.token import Token | 5 | 5 | from insight.token import Token | |
from insight.postinsight import PostInsight | 6 | 6 | from insight.postinsight import PostInsight | |
from insight.datadb import DataDB | 7 | 7 | from insight.datadb import DataDB | |
8 | 8 | |||
from insight.url import Url | 9 | 9 | from insight.url import Url | |
import time | 10 | 10 | import time | |
11 | 11 | |||
12 | 12 | |||
if __name__=='__main__': | 13 | 13 | if __name__=='__main__': | |
14 | 14 | |||
#token_str = "EAAUTLd5JgaoBAJMeeMXqcdExQ1egUHeBaIgVBCilmiH4K9RNyUt7gSgVZCZAtszWCLEaZCDQpxewhICtFjNRICFPWAqUygshcSsdEZBUeZAyUJkON7bfQ2NFFI5AqifNahzjFT83GkWZCZCZBXO3050XSjFf9HSR0iAZD" | 15 | 15 | #token_str = "EAAUTLd5JgaoBAJMeeMXqcdExQ1egUHeBaIgVBCilmiH4K9RNyUt7gSgVZCZAtszWCLEaZCDQpxewhICtFjNRICFPWAqUygshcSsdEZBUeZAyUJkON7bfQ2NFFI5AqifNahzjFT83GkWZCZCZBXO3050XSjFf9HSR0iAZD" | |
token = Token() | 16 | 16 | token = Token() | |
17 | 17 | |||
contexts = Context(token) | 18 | 18 | contexts = Context(token) | |
contexts.setContextsFeedAndTimestemp(3600 * 24 * 10 ) | 19 | 19 | contexts.setContextsFeedAndTimestemp(3600 * 24 * 10) | |
contents_list = contexts.getContentsList() | 20 | 20 | contents_list = contexts.getContentsList() | |
21 | 21 | |||
for content in contents_list: | 22 | 22 | for content in contents_list: | |
datadb = DataDB() | 23 | 23 | datadb = DataDB() | |
time.sleep(1) | 24 | 24 | time.sleep(1) | |
p_id = str(content['id'].split('_')[0]) | 25 | 25 | p_id = str(content['id'].split('_')[0]) | |
c_id = str(content['id'].split('_')[1]) | 26 | 26 | c_id = str(content['id'].split('_')[1]) | |
created_time = content['created_time'].replace('T'," ").replace('+0000',"") | 27 | 27 | created_time = content['created_time'].replace('T'," ").replace('+0000',"") | |
28 | 28 | |||
try: | 29 | 29 | try: | |
message = content['message'].replace('\n'," ").replace('\r'," ").replace('\'',"") | 30 | 30 | message = content['message'].replace('\n'," ").replace('\r'," ").replace('\'',"") | |
except: | 31 | 31 | except: | |
message = "" | 32 | 32 | message = "" | |
33 | 33 | |||
try: | 34 | 34 | try: | |
message_url = Url().getText2bitly(message) | 35 | 35 | message_url = Url().getText2bitly(message) | |
except: | 36 | 36 | except: | |
message_url = "" | 37 | 37 | message_url = "" | |
38 | 38 | |||
insight = PostInsight(token) | 39 | 39 | insight = PostInsight(token) | |
insight.setContentInsight(content['id']) | 40 | 40 | insight.setContentInsight(content['id']) | |
41 | 41 | |||
lists =["post_story_adds_unique", | 42 | 42 | lists =["post_story_adds_unique", | |
"post_story_adds", | 43 | 43 | "post_story_adds", | |
"post_story_adds_by_action_type_unique,comment", | 44 | 44 | "post_story_adds_by_action_type_unique,comment", | |
"post_story_adds_by_action_type_unique,like", | 45 | 45 | "post_story_adds_by_action_type_unique,like", | |
"post_story_adds_by_action_type_unique,share", | 46 | 46 | "post_story_adds_by_action_type_unique,share", | |
"post_story_adds_by_action_type,comment", | 47 | 47 | "post_story_adds_by_action_type,comment", | |
"post_impressions", | 48 | 48 | "post_impressions", | |
"post_impressions_paid_unique", | 49 | 49 | "post_impressions_paid_unique", | |
"post_impressions_paid", | 50 | 50 | "post_impressions_paid", | |
"post_story_adds_by_action_type,like", | 51 | 51 | "post_story_adds_by_action_type,like", | |
"post_story_adds_by_action_type,share", | 52 | 52 | "post_story_adds_by_action_type,share", | |
"post_impressions_unique", | 53 | 53 | "post_impressions_unique", | |
"post_impressions_organic_unique", | 54 | 54 | "post_impressions_organic_unique", | |
"post_impressions_organic", | 55 | 55 | "post_impressions_organic", | |
"post_impressions_by_story_type_unique,other", | 56 | 56 | "post_impressions_by_story_type_unique,other", | |
"post_impressions_by_story_type,other", | 57 | 57 | "post_impressions_by_story_type,other", | |
"post_consumptions_by_type_unique,other clicks", | 58 | 58 | "post_consumptions_by_type_unique,other clicks", | |
"post_consumptions_by_type_unique,photo view", | 59 | 59 | "post_consumptions_by_type_unique,photo view", | |
"post_consumptions_by_type_unique,video play", | 60 | 60 | "post_consumptions_by_type_unique,video play", | |
"post_consumptions_by_type_unique,link clicks", | 61 | 61 | "post_consumptions_by_type_unique,link clicks", | |
"post_consumptions_by_type,other clicks", | 62 | 62 | "post_consumptions_by_type,other clicks", | |
"post_consumptions_by_type,photo view", | 63 | 63 | "post_consumptions_by_type,photo view", | |
"post_consumptions_by_type,video play", | 64 | 64 | "post_consumptions_by_type,video play", | |
"post_consumptions_by_type,link clicks", | 65 | 65 | "post_consumptions_by_type,link clicks", | |
"post_engaged_users", | 66 | 66 | "post_engaged_users", | |
"post_video_views", | 67 | 67 | "post_video_views", | |
"post_video_views_unique", | 68 | 68 | "post_video_views_unique", | |
"post_video_views_paid", | 69 | 69 | "post_video_views_paid", | |
"post_video_views_autoplayed", | 70 | 70 | "post_video_views_autoplayed", | |
"post_video_views_10s", | 71 | 71 | "post_video_views_10s", | |
"post_video_views_10s_unique", | 72 | 72 | "post_video_views_10s_unique", | |
"post_video_views_10s_paid", | 73 | 73 | "post_video_views_10s_paid", | |
"post_video_views_10s_organic", | 74 | 74 | "post_video_views_10s_organic", | |
"post_video_views_10s_clicked_to_play", | 75 | 75 | "post_video_views_10s_clicked_to_play", | |
"post_video_views_10s_autoplayed", | 76 | 76 | "post_video_views_10s_autoplayed", | |
"post_video_views_10s_sound_on", | 77 | 77 | "post_video_views_10s_sound_on", | |
"post_video_views_sound_on", | 78 | 78 | "post_video_views_sound_on", | |
"post_video_view_time", | 79 | 79 | "post_video_view_time", | |
"post_video_complete_views_organic", | 80 | 80 | "post_video_complete_views_organic", | |
"post_video_complete_views_paid"] | 81 | 81 | "post_video_complete_views_paid"] | |
82 | 82 | |||
sqlprefix = "insert into facebook_insights2 (" | 83 | 83 | sqlprefix = "insert into facebook_insights2 (" | |
sqlreplace = "REPLACE into facebook_insights2_last (" | 84 | 84 | sqlreplace = "REPLACE into facebook_insights2_last (" | |
sqlvalues = " values (" | 85 | 85 | sqlvalues = " values (" | |
86 | 86 | |||
sqlprefix += "`p_id`, " | 87 | 87 | sqlprefix += "`p_id`, " | |
sqlprefix += "`c_id`, " | 88 | 88 | sqlprefix += "`c_id`, " | |
sqlprefix += "`type`, " | 89 | 89 | sqlprefix += "`type`, " | |
sqlprefix += "`message`, " | 90 | 90 | sqlprefix += "`message`, " | |
sqlprefix += "`message_url`, " | 91 | 91 | sqlprefix += "`message_url`, " | |
sqlprefix += "`comment_url`, " | 92 | 92 | sqlprefix += "`comment_url`, " | |
sqlprefix += "`created_time`, " | 93 | 93 | sqlprefix += "`created_time`, " | |
sqlprefix += "`loging_time`, " | 94 | 94 | sqlprefix += "`loging_time`, " | |
95 | 95 | |||
sqlprefix += "`bit_url`, " | 96 | 96 | sqlprefix += "`bit_url`, " | |
sqlprefix += "`bit_click`, " | 97 | 97 | sqlprefix += "`bit_click`, " | |
sqlprefix += "`origin_url`, " | 98 | 98 | sqlprefix += "`origin_url`, " | |
sqlprefix += "`piki_cid`, " | 99 | 99 | sqlprefix += "`piki_cid`, " | |
sqlprefix += "`rpiki_click`, " | 100 | 100 | sqlprefix += "`rpiki_click`, " | |
101 | 101 | |||
sqlvalues += p_id + ", " | 102 | 102 | sqlvalues += p_id + ", " | |
sqlvalues += c_id + ", " | 103 | 103 | sqlvalues += c_id + ", " | |
sqlvalues += "'" + insight.getContentType() + "', " | 104 | 104 | sqlvalues += "'" + insight.getContentType() + "', " | |
sqlvalues += "'" + message + "', " | 105 | 105 | sqlvalues += "'" + message + "', " | |
sqlvalues += "'" + message_url + "', " | 106 | 106 | sqlvalues += "'" + message_url + "', " | |
sqlvalues += "'" + insight.getContentCommentUrl() + "', " | 107 | 107 | sqlvalues += "'" + insight.getContentCommentUrl() + "', " | |
sqlvalues += "'" + created_time + "', " | 108 | 108 | sqlvalues += "'" + created_time + "', " | |
sqlvalues += "NOW(), " | 109 | 109 | sqlvalues += "NOW(), " | |
110 | 110 | |||
url_data = Url().url2dic([insight.getLinkUrl(),message_url,insight.getContentCommentUrl()])[0] | 111 | 111 | url_data = Url().url2dic([insight.getLinkUrl(),message_url,insight.getContentCommentUrl()])[0] |