xiaojiejei
Created At : 2024-06-06 00:00
Count:441
Views 👀 :
import requestsfrom lxml import etreeimport osimport refrom urllib.parse import quotedef url_quote (url ): _ = url.replace("https://xiaojiejie.me" ,"" ) __ = quote(_) return "https://xiaojiejie.me" + __ def list_to_str (lst ): return '' .join(lst) def remove_punctuation (text ): result = re.sub(r'[^\u4e00-\u9fa5]' , '' , text) return result headers = { 'User-Agent' : 'Apifox/1.0.0 (https://apifox.com)' } def download_single_img (url,classify=None ,name=None ,index=None ): response = requests.get(url,headers=headers) root = etree.HTML(response.text) local_img = root.xpath('//div[@id="image_div"]//img/@src' ) for img in local_img: print (img) if "webp" in img and "logo" not in img: print ("ok:" ,img) _save = os.path.join(save_dir,classify,name) if not os.path.exists(_save): os.makedirs(_save) res = requests.get(url_quote(img)) print (res.status_code,"write" ) with open (os.path.join(_save,str (index)+".jpg" ), "wb" ) as f: f.write(res.content) def get_single_page_content (url,classify=None ): print (url) r = requests.get(url,headers=headers) root = etree.HTML(r.text) item_info = root.xpath('//ul[@id="index_ajax_list"]/li' ) for item in item_info: source_url = list_to_str(item.xpath('a/@href' )) page_number = list_to_str(item.xpath('a/div/span/text()' )) _name = remove_punctuation(list_to_str(item.xpath('div/a/text()' ))) try : download_single_img(url,classify=classify,name=_name,index="1" ) except : print ("error:" ,source_url) for _index in range (2 ,int (page_number)+1 ): _url = source_url + str (_index) + "/" print (_url,classify) try : download_single_img(_url,classify=classify,name=_name,index=_index) except : print ("error:" ,_url) u = [ {"https://xiaojiejie.me/coser/ucoser" :83 }, {"https://xiaojiejie.me/life/yygirls" :235 }, {"https://xiaojiejie.me/aigirls" :1 } ] save_dir = "/root/info/xiao/img" for sub in u: for k,v in sub.items(): print (k,v) for i in range (1 ,v+1 ): _ = k + "/page/" + str (i) + "/" get_single_page_content(_,classify=k.split("/" )[-1 ])