Commit b11b0e38b2e52deda00f67f57557f57046c30975

Authored by Noah ago
1 parent 9a37ba2213
Exists in master

fr 부서진 링크도 수집

Showing 1 changed file with 3 additions and 1 deletions Side-by-side Diff

insight/url.py View file @ b11b0e3
... ... @@ -15,6 +15,8 @@
15 15 if (True):
16 16 for link in links:
17 17  
  18 + link = link.replace('\n'," ").replace('\r'," ").replace('\'',"")
  19 +
18 20 if len(link) < 5:
19 21 continue
20 22  
... ... @@ -42,7 +44,7 @@
42 44 elif self.isbitly(link):
43 45  
44 46 try:
45   - link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0])
  47 + link = "http://bit.ly/" + re.compile('[^./a-zA-Z0-9]+').sub("",link.split("//bit.ly/")[1].split(" ")[0])
46 48  
47 49 link_meta = link + "+"
48 50 txt = requests.get(link_meta).text