Commit 45083b8193cd13444d625d918e10a3c028fee1c4
Exists in
master
Merge branch 'master' of http://gitlab.pikicast.com/Noah/dsfacebook
Showing 1 changed file Inline Diff
insight/url.py
View file @
45083b8
# -*- coding: utf-8 -*- | 1 | 1 | # -*- coding: utf-8 -*- | |
2 | 2 | |||
import requests | 3 | 3 | import requests | |
from base62 import Base62 | 4 | 4 | from base62 import Base62 | |
import json | 5 | 5 | import json | |
import re | 6 | 6 | import re | |
7 | 7 | |||
class Url() : | 8 | 8 | class Url() : | |
9 | 9 | |||
server = "http://10.128.0.20/" | 10 | 10 | server = "http://10.128.0.20/" | |
11 | 11 | |||
def url2dic(self, links): | 12 | 12 | def url2dic(self, links): | |
data = [] | 13 | 13 | data = [] | |
14 | 14 | |||
if (True): | 15 | 15 | if (True): | |
for link in links: | 16 | 16 | for link in links: | |
17 | 17 | |||
link = link.replace('\n'," ").replace('\r'," ").replace('\'',"") | 18 | 18 | link = link.replace('\n'," ").replace('\r'," ").replace('\'',"") | |
19 | 19 | |||
if len(link) < 5: | 20 | 20 | if len(link) < 5: | |
continue | 21 | 21 | continue | |
22 | 22 | |||
if self.isdeep(link) : | 23 | 23 | if self.isdeep(link) : | |
24 | 24 | |||
data.append({ | 25 | 25 | data.append({ | |
'bitly_url' : "", | 26 | 26 | 'bitly_url' : "", | |
'origin_url': str(link), | 27 | 27 | 'origin_url': str(link), | |
'bitly_click': "0", | 28 | 28 | 'bitly_click': "0", | |
'piki_cid' : str(self.Url2Cid(link)), | 29 | 29 | 'piki_cid' : str(self.Url2Cid(link)), | |
'rpiki_click' : "0" | 30 | 30 | 'rpiki_click' : "0" | |
}) | 31 | 31 | }) | |
32 | 32 | |||
elif self.isrpiki(link) : | 33 | 33 | elif self.isrpiki(link) : | |
print "여기?" | 34 | 34 | print "여기?" | |
print link | 35 | 35 | print link | |
data.append({ | 36 | 36 | data.append({ | |
'bitly_url' : "", | 37 | 37 | 'bitly_url' : "", | |
'origin_url': str(link), | 38 | 38 | 'origin_url': str(link), | |
'bitly_click': "0", | 39 | 39 | 'bitly_click': "0", | |
'piki_cid' : str(self.Url2Cid(link)), | 40 | 40 | 'piki_cid' : str(self.Url2Cid(link)), | |
'rpiki_click' : str(self.rpiki2click(link)) | 41 | 41 | 'rpiki_click' : str(self.rpiki2click(link)) | |
}) | 42 | 42 | }) | |
43 | 43 | |||
elif self.isbitly(link): | 44 | 44 | elif self.isbitly(link): | |
45 | 45 | |||
try: | 46 | 46 | try: | |
link = "http://bit.ly/" + re.compile('[^./a-zA-Z0-9]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) | 47 | 47 | link = "http://bit.ly/" + re.compile('[^./a-zA-Z0-9]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) | |
48 | 48 | |||
link_meta = link + "+" | 49 | 49 | link_meta = link + "+" | |
txt = requests.get(link_meta).text | 50 | 50 | txt = requests.get(link_meta).text | |
51 | 51 | |||
source_tag_op = "\"long_url\": \"" | 52 | 52 | source_tag_op = "\"long_url\": \"" | |
source_tag_cl = "\"" | 53 | 53 | source_tag_cl = "\"" | |
54 | 54 | |||
clicks_tag_op = "\"user_clicks\": " | 55 | 55 | clicks_tag_op = "\"user_clicks\": " | |
clicks_tag_cl = "," | 56 | 56 | clicks_tag_cl = "," | |
57 | 57 | |||
source_bgn = txt.find(source_tag_op) + len(source_tag_op) | 58 | 58 | source_bgn = txt.find(source_tag_op) + len(source_tag_op) | |
source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) | 59 | 59 | source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) | |
clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) | 60 | 60 | clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) | |
clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) | 61 | 61 | clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) | |
62 | 62 | |||
except: | 63 | 63 | except: | |
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | 64 | 64 | data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | |
65 | 65 | |||
try: | 66 | 66 | try: | |
piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] | 67 | 67 | piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] | |
except: | 68 | 68 | except: | |
piki_url = str(0) | 69 | 69 | piki_url = str(0) | |
70 | 70 | |||
if self.isrpiki(txt[source_bgn:source_end]) : | 71 | 71 | if self.isrpiki(txt[source_bgn:source_end]) : | |
data.append({ | 72 | 72 | data.append({ | |
'bitly_url' : str(link), | 73 | 73 | 'bitly_url' : str(link), | |
'origin_url': str(txt[source_bgn:source_end]), | 74 | 74 | 'origin_url': str(txt[source_bgn:source_end]), | |
'bitly_click': str(txt[clicks_bgn:clicks_end]), | 75 | 75 | 'bitly_click': str(txt[clicks_bgn:clicks_end]), | |
'piki_cid' : str(Base62().decode(piki_url)), | 76 | 76 | 'piki_cid' : str(Base62().decode(piki_url)), | |
'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) | 77 | 77 | 'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) | |
}) | 78 | 78 | }) | |
79 | 79 | |||
else: | 80 | 80 | else: | |
#print link | 81 | 81 | #print link | |
data.append({ | 82 | 82 | data.append({ | |
'bitly_url' : str(link), | 83 | 83 | 'bitly_url' : str(link), | |
'origin_url': str(txt[source_bgn:source_end]), | 84 | 84 | 'origin_url': str(txt[source_bgn:source_end]), | |
'bitly_click': str(txt[clicks_bgn:clicks_end]), | 85 | 85 | 'bitly_click': str(txt[clicks_bgn:clicks_end]), | |
'piki_cid' : str(Base62().decode(piki_url)), | 86 | 86 | 'piki_cid' : str(Base62().decode(piki_url)), | |
'rpiki_click' : "0" | 87 | 87 | 'rpiki_click' : "0" | |
}) | 88 | 88 | }) | |
89 | 89 | |||
if len(data) == 0: | 90 | 90 | if len(data) == 0: | |
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | 91 | 91 | data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) | |
92 | 92 | |||
return data | 93 | 93 | return data | |
94 | 94 | |||
95 | 95 | |||
96 | 96 | |||
def Url2Cid(self,url): | 97 | 97 | def Url2Cid(self,url): | |
98 | 98 | |||
if self.isrpiki(url): | 99 | 99 | try: | |
return Base62().decode(url.split("cid=")[1].split("&")[0]) | 100 | 100 | if self.isrpiki(url): | |
elif self.isdeep(url): | 101 | 101 | return Base62().decode(url.split("cid=")[1].split("&")[0]) | |
try: | 102 | 102 | elif self.isdeep(url): | |
return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] | 103 | 103 | return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] | |
except : | 104 | 104 | except : | |
return "0" | 105 | 105 | return "0" | |
106 | 106 | |||
def rpiki2click(self,url): | 107 | 107 | def rpiki2click(self,url): | |
#print url | 108 | 108 | #print url | |
api = self.server + "contents_RPIKI_api/" | 109 | 109 | api = self.server + "contents_RPIKI_api/" | |
110 | 110 | |||
try: | 111 | 111 | try: | |
fr = url.split("fr=")[1].split("&")[0] | 112 | 112 | fr = url.split("fr=")[1].split("&")[0] | |
except: | 113 | 113 | except: | |
fr = "" | 114 | 114 | fr = "" | |
115 | 115 | |||
try: | 116 | 116 | try: | |
cid = url.split("cid=")[1].split("&")[0] | 117 | 117 | cid = url.split("cid=")[1].split("&")[0] | |
m = url.split("m=")[1].split("&")[0] | 118 | 118 | m = url.split("m=")[1].split("&")[0] | |
c = url.split("c=")[1].split("&")[0] | 119 | 119 | c = url.split("c=")[1].split("&")[0] | |
v = url.split("v=")[1].split("&")[0] | 120 | 120 | v = url.split("v=")[1].split("&")[0] | |
t = url.split("t=")[1].split("&")[0] | 121 | 121 | t = url.split("t=")[1].split("&")[0] | |
data = json.loads(requests.get(api + cid + '_' + fr + '_' + m + '_' + c + '_' + v + '_' + t).text) | 122 | 122 | data = json.loads(requests.get(api + cid + '_' + fr + '_' + m + '_' + c + '_' + v + '_' + t).text) |