Blame view
insight/url.py
4.77 KB
cd3f2b98c
|
1 2 3 4 |
# -*- coding: utf-8 -*- import requests from base62 import Base62 |
9a4ce3987
|
5 |
import json |
24d172161
|
6 |
import re |
cd3f2b98c
|
7 8 9 10 11 |
class Url() : def url2dic(self, links): data = [] |
9a4ce3987
|
12 |
if (True): |
cd3f2b98c
|
13 |
for link in links: |
cd3f2b98c
|
14 15 |
if len(link) < 5: continue |
9a4ce3987
|
16 17 18 19 20 21 22 23 24 25 26 |
if self.isdeep(link) : data.append({ 'bitly_url' : "", 'origin_url': str(link), 'bitly_click': "0", 'piki_cid' : str(self.Url2Cid(link)), 'rpiki_click' : "0" }) elif self.isrpiki(link) : |
cd3f2b98c
|
27 28 29 30 31 |
data.append({ 'bitly_url' : "", 'origin_url': str(link), 'bitly_click': "0", |
9a4ce3987
|
32 33 |
'piki_cid' : str(self.Url2Cid(link)), 'rpiki_click' : str(self.rpiki2click(link)) |
cd3f2b98c
|
34 35 36 |
}) elif self.isbitly(link): |
9a4ce3987
|
37 |
|
24d172161
|
38 39 40 41 42 |
try: link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) link_meta = link + "+" txt = requests.get(link_meta).text |
cd3f2b98c
|
43 |
|
24d172161
|
44 45 |
source_tag_op = "\"long_url\": \"" source_tag_cl = "\"" |
cd3f2b98c
|
46 |
|
24d172161
|
47 48 |
clicks_tag_op = "\"user_clicks\": " clicks_tag_cl = "," |
cd3f2b98c
|
49 |
|
24d172161
|
50 51 52 53 54 55 56 |
source_bgn = txt.find(source_tag_op) + len(source_tag_op) source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) except: data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) |
cd3f2b98c
|
57 58 59 60 61 |
try: piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] except: piki_url = str(0) |
9a4ce3987
|
62 63 64 65 66 67 68 69 70 71 |
if self.isrpiki(txt[source_bgn:source_end]) : data.append({ 'bitly_url' : str(link), 'origin_url': str(txt[source_bgn:source_end]), 'bitly_click': str(txt[clicks_bgn:clicks_end]), 'piki_cid' : str(Base62().decode(piki_url)), 'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) }) else: |
077e29190
|
72 |
#print link |
9a4ce3987
|
73 74 75 76 77 78 79 |
data.append({ 'bitly_url' : str(link), 'origin_url': str(txt[source_bgn:source_end]), 'bitly_click': str(txt[clicks_bgn:clicks_end]), 'piki_cid' : str(Base62().decode(piki_url)), 'rpiki_click' : "0" }) |
cd3f2b98c
|
80 81 |
if len(data) == 0: |
9a4ce3987
|
82 |
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) |
cd3f2b98c
|
83 |
|
cd3f2b98c
|
84 |
return data |
9a4ce3987
|
85 |
def Url2Cid(self,url): |
cd3f2b98c
|
86 87 88 89 90 91 92 93 |
if self.isrpiki(url): return Base62().decode(url.split("cid=")[1].split("&")[0]) elif self.isdeep(url): try: return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] except : return "0" |
9a4ce3987
|
94 95 |
def rpiki2click(self,url): #print url |
077e29190
|
96 |
api = "http://data2.piki.work/contents_RPIKI_api/" |
9a4ce3987
|
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
try: cid = url.split("cid=")[1].split("&")[0] fr = url.split("fr=")[1].split("&")[0] m = url.split("m=")[1].split("&")[0] c = url.split("c=")[1].split("&")[0] v = url.split("v=")[1].split("&")[0] t = url.split("t=")[1].split("&")[0] data = json.loads(requests.get(api + cid + '_' + fr + '_' + m + '_' + c + '_' + v + '_' + t).text) ret = data['data']['real'] if ret == "": ret = "0" except: ret = "0" return ret |
cd3f2b98c
|
113 114 |
def getText2bitly(self,text): |
d023d74e4
|
115 |
return "http://bit.ly/" + text.split("http://bit.ly/")[1].split(" ")[0] |
cd3f2b98c
|
116 117 118 119 120 121 122 123 124 125 |
def isdeep(self, url): if url.find("//fb.me/") > 0 : return True else: return False def isrpiki(self, url): |
cd3f2b98c
|
126 127 128 129 130 131 132 133 134 135 136 |
if url.find("//r.pikicast.com/") > 0 : return True else: return False def isbitly(self, url): if url.find("//bit.ly/") > 0 : return True else: return False |
9a4ce3987
|
137 |