Commit 45083b8193cd13444d625d918e10a3c028fee1c4

Authored by steve ago
Exists in master

Merge branch 'master' of http://gitlab.pikicast.com/Noah/dsfacebook

Showing 1 changed file Inline Diff

insight/url.py View file @ 45083b8
# -*- coding: utf-8 -*- 1 1 # -*- coding: utf-8 -*-
2 2
import requests 3 3 import requests
from base62 import Base62 4 4 from base62 import Base62
import json 5 5 import json
import re 6 6 import re
7 7
class Url() : 8 8 class Url() :
9 9
server = "http://10.128.0.20/" 10 10 server = "http://10.128.0.20/"
11 11
def url2dic(self, links): 12 12 def url2dic(self, links):
data = [] 13 13 data = []
14 14
if (True): 15 15 if (True):
for link in links: 16 16 for link in links:
17 17
link = link.replace('\n'," ").replace('\r'," ").replace('\'',"") 18 18 link = link.replace('\n'," ").replace('\r'," ").replace('\'',"")
19 19
if len(link) < 5: 20 20 if len(link) < 5:
continue 21 21 continue
22 22
if self.isdeep(link) : 23 23 if self.isdeep(link) :
24 24
data.append({ 25 25 data.append({
'bitly_url' : "", 26 26 'bitly_url' : "",
'origin_url': str(link), 27 27 'origin_url': str(link),
'bitly_click': "0", 28 28 'bitly_click': "0",
'piki_cid' : str(self.Url2Cid(link)), 29 29 'piki_cid' : str(self.Url2Cid(link)),
'rpiki_click' : "0" 30 30 'rpiki_click' : "0"
}) 31 31 })
32 32
elif self.isrpiki(link) : 33 33 elif self.isrpiki(link) :
print "여기?" 34 34 print "여기?"
print link 35 35 print link
data.append({ 36 36 data.append({
'bitly_url' : "", 37 37 'bitly_url' : "",
'origin_url': str(link), 38 38 'origin_url': str(link),
'bitly_click': "0", 39 39 'bitly_click': "0",
'piki_cid' : str(self.Url2Cid(link)), 40 40 'piki_cid' : str(self.Url2Cid(link)),
'rpiki_click' : str(self.rpiki2click(link)) 41 41 'rpiki_click' : str(self.rpiki2click(link))
}) 42 42 })
43 43
elif self.isbitly(link): 44 44 elif self.isbitly(link):
45 45
try: 46 46 try:
link = "http://bit.ly/" + re.compile('[^./a-zA-Z0-9]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) 47 47 link = "http://bit.ly/" + re.compile('[^./a-zA-Z0-9]+').sub("",link.split("//bit.ly/")[1].split(" ")[0])
48 48
link_meta = link + "+" 49 49 link_meta = link + "+"
txt = requests.get(link_meta).text 50 50 txt = requests.get(link_meta).text
51 51
source_tag_op = "\"long_url\": \"" 52 52 source_tag_op = "\"long_url\": \""
source_tag_cl = "\"" 53 53 source_tag_cl = "\""
54 54
clicks_tag_op = "\"user_clicks\": " 55 55 clicks_tag_op = "\"user_clicks\": "
clicks_tag_cl = "," 56 56 clicks_tag_cl = ","
57 57
source_bgn = txt.find(source_tag_op) + len(source_tag_op) 58 58 source_bgn = txt.find(source_tag_op) + len(source_tag_op)
source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl) 59 59 source_end = source_bgn + txt[source_bgn:(source_bgn + 500)].find(source_tag_cl)
clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op) 60 60 clicks_bgn = txt.find(clicks_tag_op) + len(clicks_tag_op)
clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl) 61 61 clicks_end = clicks_bgn + txt[clicks_bgn:(clicks_bgn + 50)].find(clicks_tag_cl)
62 62
except: 63 63 except:
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) 64 64 data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"})
65 65
try: 66 66 try:
piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0] 67 67 piki_url = str(txt[source_bgn:source_end]).split("cid=")[1].split("&")[0]
except: 68 68 except:
piki_url = str(0) 69 69 piki_url = str(0)
70 70
if self.isrpiki(txt[source_bgn:source_end]) : 71 71 if self.isrpiki(txt[source_bgn:source_end]) :
data.append({ 72 72 data.append({
'bitly_url' : str(link), 73 73 'bitly_url' : str(link),
'origin_url': str(txt[source_bgn:source_end]), 74 74 'origin_url': str(txt[source_bgn:source_end]),
'bitly_click': str(txt[clicks_bgn:clicks_end]), 75 75 'bitly_click': str(txt[clicks_bgn:clicks_end]),
'piki_cid' : str(Base62().decode(piki_url)), 76 76 'piki_cid' : str(Base62().decode(piki_url)),
'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end])) 77 77 'rpiki_click' : str(self.rpiki2click(txt[source_bgn:source_end]))
}) 78 78 })
79 79
else: 80 80 else:
#print link 81 81 #print link
data.append({ 82 82 data.append({
'bitly_url' : str(link), 83 83 'bitly_url' : str(link),
'origin_url': str(txt[source_bgn:source_end]), 84 84 'origin_url': str(txt[source_bgn:source_end]),
'bitly_click': str(txt[clicks_bgn:clicks_end]), 85 85 'bitly_click': str(txt[clicks_bgn:clicks_end]),
'piki_cid' : str(Base62().decode(piki_url)), 86 86 'piki_cid' : str(Base62().decode(piki_url)),
'rpiki_click' : "0" 87 87 'rpiki_click' : "0"
}) 88 88 })
89 89
if len(data) == 0: 90 90 if len(data) == 0:
data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"}) 91 91 data.append({'bitly_url' : "",'origin_url': "",'bitly_click': "0",'piki_cid' : "0",'rpiki_click' : "0"})
92 92
return data 93 93 return data
94 94
95 95
96 96
def Url2Cid(self,url): 97 97 def Url2Cid(self,url):
98 98
if self.isrpiki(url): 99 99 try:
return Base62().decode(url.split("cid=")[1].split("&")[0]) 100 100 if self.isrpiki(url):
elif self.isdeep(url): 101 101 return Base62().decode(url.split("cid=")[1].split("&")[0])
try: 102 102 elif self.isdeep(url):
return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0] 103 103 return requests.get(url).text.split("http://www.pikicast.com/share/")[1].split('"')[0]
except : 104 104 except :
return "0" 105 105 return "0"
106 106
def rpiki2click(self,url): 107 107 def rpiki2click(self,url):
#print url 108 108 #print url
api = self.server + "contents_RPIKI_api/" 109 109 api = self.server + "contents_RPIKI_api/"
110 110
try: 111 111 try:
fr = url.split("fr=")[1].split("&")[0] 112 112 fr = url.split("fr=")[1].split("&")[0]
except: 113 113 except:
fr = "" 114 114 fr = ""
115 115
try: 116 116 try:
cid = url.split("cid=")[1].split("&")[0] 117 117 cid = url.split("cid=")[1].split("&")[0]
m = url.split("m=")[1].split("&")[0] 118 118 m = url.split("m=")[1].split("&")[0]
c = url.split("c=")[1].split("&")[0] 119 119 c = url.split("c=")[1].split("&")[0]
v = url.split("v=")[1].split("&")[0] 120 120 v = url.split("v=")[1].split("&")[0]
t = url.split("t=")[1].split("&")[0] 121 121 t = url.split("t=")[1].split("&")[0]
data = json.loads(requests.get(api + cid + '_' + fr + '_' + m + '_' + c + '_' + v + '_' + t).text) 122 122 data = json.loads(requests.get(api + cid + '_' + fr + '_' + m + '_' + c + '_' + v + '_' + t).text)