Commit b11b0e38b2e52deda00f67f57557f57046c30975
1 parent
9a37ba2213
Exists in
master
fr 부서진 링크도 수집
Showing 1 changed file with 3 additions and 1 deletions Side-by-side Diff
insight/url.py
View file @
b11b0e3
... | ... | @@ -15,6 +15,8 @@ |
15 | 15 | if (True): |
16 | 16 | for link in links: |
17 | 17 | |
18 | + link = link.replace('\n'," ").replace('\r'," ").replace('\'',"") | |
19 | + | |
18 | 20 | if len(link) < 5: |
19 | 21 | continue |
20 | 22 | |
... | ... | @@ -42,7 +44,7 @@ |
42 | 44 | elif self.isbitly(link): |
43 | 45 | |
44 | 46 | try: |
45 | - link = "http://bit.ly/" + re.compile('[^ \.\,\?\!a-zA-Z0-9\u3131-\u3163\uac00-\ud7a3]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) | |
47 | + link = "http://bit.ly/" + re.compile('[^./a-zA-Z0-9]+').sub("",link.split("//bit.ly/")[1].split(" ")[0]) | |
46 | 48 | |
47 | 49 | link_meta = link + "+" |
48 | 50 | txt = requests.get(link_meta).text |