python get weather data

Recently, I am learning python crawler. When crawling some websites, I need to submit encrypted data, so I record the crawling process.

For your own study and archive.

1. Target website

  • The China Air Quality Online Monitoring and Analysis Platform includes PM2.5 and weather information data of 367 cities across the country, including AQI, PM2.5, PM10, S02, N02, O3, CO, temperature, humidity, wind level, wind direction, satellite Monitoring items such as cloud maps.
  • website link: https://www.aqistudy.cn/html/city_detail.php?v=1.10

2. Parse web pages

After opening the URL, right-click F12 to open the developer mode, click the query button, and the following request will appear,

Searching for keyword names, we found encrypted parameters, submitted parameter names

Locate key parameters


But only part of the encrypted code in this js file is found here after a global search,
Exported by the icon eval function, js code is obtained after js de-obfuscation


After many tests, it was found that the function name and secret key in the js file that submitted the request were dynamically changed, and later
The source is found on the homepage, as shown in the figure:

After combining two pieces of js code, the complete encryption and decryption code can be obtained, and the encryption function name, the submitted parameter name and the decryption function can be extracted with regular expressions.

Third, the specific implementation

"""
===================================
    -*- coding:utf-8 -*-
    Author     :GadyPu
    E_mail     :Gadypy@gmail.com
    Time       :2020/8/18 0010 01 pm:31
    FileName   :go_aqi.py
===================================
"""
import re
import json
import execjs
import requests
import warnings
from lxml import etree
warnings.filterwarnings('ignore')

class GetWeather(object):
    def __init__(self):
        self.url = 'https://www.aqistudy.cn/html/city_detail.php?v=1.10'
        self.api = 'https://www.aqistudy.cn'
        self.headers = {
            'Host': 'www.aqistudy.cn',
            'Origin': 'https://www.aqistudy.cn',
            'Referer': 'https://www.aqistudy.cn/html/city_detail.php?v=1.10',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; ZTE BA520 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.77 Mobile Safari/537.36',
        }
        self.req_url = 'https://www.aqistudy.cn/apinew/aqistudyapi.php'
        self.js_code = None
        self.js_ctx = None
        self.encrypt_func = None
        self.encrypt_param = None
        self.decode_func = None

    def init_js_code(self):
        try:
            response = requests.get(url = self.url, headers = self.headers, verify = False)
            html = etree.HTML(response.text)
            js_path = html.xpath('/html/body/script[2]/@src')[0]
            #print(self.api + js_path[2:])

            response = requests.get(url = self.api + js_path[2:], headers = self.headers, verify = False)
            #print(response.text)
            # function name to encrypt parameters
            pat_f = r'var param = (.*)\(.*\)'
            self.encrypt_func = re.findall(pat_f, response.text)[0]
            #print(func_name)
            # The parameter name of the post submitted data
            pat_p = r'(?<=\{).+(?=\})'
            self.encrypt_param = re.search(pat_p, response.text)[0].split(':')[0].strip()
            #print(param_name)
            # deocde function
            pat_d = r'data = (.*)\(.*\)'
            self.decode_func = re.findall(pat_d, response.text)[-1]
            #print(func_decode)

            '''
            combination js code
            '''
            with open('gogo.js', 'r', encoding = 'utf-8') as fp:
                self.js_code = fp.read()
            self.js_code += response.text
            self.js_ctx = execjs.compile(self.js_code)
        except Exception as e:
            print(e)

    def get_weather_data(self, method, obj):
        param = {
            self.encrypt_param: self.js_ctx.call(self.encrypt_func, method, obj)
        }
        print(param)
        response = requests.post(url = self.req_url, headers = self.headers, data = param, verify = False)
        return self.js_ctx.call(self.decode_func, response.text)

    def run(self, method, obj, months: list):
        self.init_js_code()
        for mon in months:
            if mon[0] == '2020-08-01':
                obj.update({"startTime":f"{mon[0]} 00:00:00"})
                obj.update({"endTime":f"{mon[1]} 00:00:00"})
                js_data = self.get_weather_data(method, obj)
                # with open('urumqi_weather_2020.json', 'a', encoding = 'utf-8') as wf:
                #     wf.write(js_data + '\n')
                js_data = json.loads(js_data)['result']['data']['rows']
                max_per_month = max(js_data, key = lambda x: float(x['temp']))['temp']
                min_per_month = min(js_data, key = lambda x: float(x['temp']))['temp']
                ave_per_month = sum([float(i['temp']) for i in js_data])
                print([max_per_month, min_per_month, round(ave_per_month / len(js_data), 1)])


if __name__ == '__main__':

    obj = {"city":"Urumqi","type":"DAY","startTime":"2020-08-01 00:00:00","endTime":"2020-08-14 00:00:00"}
    #'GETCITYWEATHER' to get weather data such as temperature, humidity, wind strength
    #'GETDETAIL' to get pm2.5, co, so2...
    method = "GETCITYWEATHER"
    ll = map(lambda m, d: "2020" + '-' + "%02d"%m + '-' + "%02d"%d, [_ for _ in range(1, 9)], [31, 28, 31, 30, 31, 30, 31, 18])
    months = map(lambda x, y: ("2020" + '-' + "%02d"%x + '-' + "01", y), [_ for _ in range(1, 9)], [_ for _ in list(ll)])
    d = GetWeather()
    d.run(method, obj, months)

Fourth, the effect map

5. Reference links

https://www.cnblogs.com/bobo-zhang/p/11243138.html

Tags: Python

Posted by chris1 on Sun, 22 May 2022 23:56:59 +0300