Commit 9ba65111 by 白满斌

爬取

parent 1024dc56
<?php
namespace App\Console\Commands;
use App\Models\LeidaModel;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Facades\Log;
use App\Http\Services\BaseService;
class BatchDetail extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'leida:detail
{id : 要爬取的id}
{max : 要爬取的max}';
/**
* The console command description.
*
* @var string
*/
protected $description = '爬取数据';
protected $logFile = './paqu.txt';
protected $frontUserModel;
protected $orderModel;
protected $dealRecordModel;
//省-市-0-考试类型-招考公告
public static $ksType = [
// '公务员' => '-0-0-2-124',
// "事业单位" => "-0-0-3-124",
// "教师" => "-0-0-59-124",
"医疗" => "-0-0-60-124",
"选调" => "-0-0-7-124",
"遴选" => "-0-0-63-124",
"选调生" => "-0-0-62-124",
"三支一扶" => "-0-0-8-124",
"大学生村官" => "-0-0-9-124",
"基层工作者" => "-0-0-66-124",
"银行" => "-0-0-67-124",
"国企" => "-0-0-78-124",
"公益性岗位" => "-0-0-80-124",
"军队文职" => "-0-0-249-124",
];
public static $ksArea = [
3510 => "国家",
1117 => "安徽",
1 => "北京",
1255 => "福建",
2129 => "广东",
3191 => "甘肃",
2290 => "广西",
2723 => "贵州",
37 => "河北",
1849 => "湖北",
705 => "黑龙江",
1654 => "河南",
2429 => "海南",
1979 => "湖南",
627 => "吉林",
878 => "江苏",
1359 => "江西",
498 => "辽宁",
374 => "内蒙古",
3357 => "宁夏",
3304 => "青海",
2500 => "四川",
1482 => "山东",
859 => "上海",
232 => "山西",
3063 => "陕西",
19 => "天津",
2980 => "西藏",
3390 => "新疆",
2826 => "云南",
1004 => "浙江",
2460 => "重庆",
3508 => "香港",
3509 => "澳门",
3507 => "台湾",
];
protected $client;
protected $jar;
protected $baseStoragePath = 'crawled_data';
/**
* Execute the console command.
*
* @return int
*/
public function handle()
{
$LeidaModel = new LeidaModel();
$id = $this->argument('id');
$max = $this->argument('max');
$this->client = new Client([
'timeout' => 30,
'verify' => false, // 注意:生产环境应设为 true
'allow_redirects' => [
'max' => 5,
'strict' => true,
'referer' => true,
'protocols' => ['http', 'https'],
],
]);
dump($id, $max);
$total = 0;
$LeidaModel->whereBetween('id', [$id, $max])->select(['*'])->chunkById(50, function ($list) use (&$LeidaModel) {
$nowTime = time();
$list = $list->toArray();
foreach ($list as $key => $value) {
$articleId = self::getLastNumberFromUrl($value['url']);
$str = 'appid=uqsFgLOVbuPrfn1v&articleId='.$articleId.'&from_device=h5&timestamp='.time().'&token=569fff2952c0317c29f3308df15cd75d11d15fmern1cdeol3f9tcfo31cdnb3o9o5yd03&userId=26239811';
$signStr = md5(urlencode($str.'&secret=Hf6yn1JPb1QZxniWhIPv1IrHbWeLh2e8'));
$url = 'https://api.gongkaoleida.com/api/FrontBackSecure/article/detailUserPc?'.$str.'&sign='.$signStr;
try {
$refer = $url;
// 发送请求
$response = $this->client->request('GET', $url);
// 获取响应
$statusCode = $response->getStatusCode();
$content = json_decode($response->getBody()->getContents(), true);
if($statusCode != 200){
dd('状态码异常:'.$statusCode , $value['id'], $content);
}
if($content['code'] != 1){
dd($value['id'], $content);
}
$sourceData = $content['data']['articleInfo'] ?? [];
if(empty($sourceData)){
dd('数据异常:'.$value['id']);
}
$from_url = $sourceData['sourcePageUrl'];
$from_title = $sourceData['origin'];
$from_detail = json_encode($sourceData);
$updateData = [
'from_url' => $from_url,
'from_title' => $from_title,
'from_detail' => $from_detail,
];
$LeidaModel->updateData(['id'=>$value['id']], $updateData);
dump('done:'.$value['id']);
sleep(5);
} catch (\Exception $e) {
dd($e->getMessage(), $value['id']);
Log::error('Web crawler error: ' . $e->getMessage(), [
'url' => $url,
'error' => $e->getMessage()
]);
}
}
});
}
// 方法1:简单正则匹配
public static function getLastNumberFromUrl($url) {
// 使用正则匹配最后一个数字
if (preg_match('/\/(\d+)(?:\?.*)?$/', $url, $matches)) {
return (int)$matches[1];
}
return null;
}
// 记录日志
protected function log($msg)
{
file_put_contents($this->logFile, trim($msg) . "\n", FILE_APPEND);
}
}
...@@ -2,12 +2,14 @@ ...@@ -2,12 +2,14 @@
namespace App\Console\Commands; namespace App\Console\Commands;
use App\Models\FrontUserModel; use App\Models\LeidaModel;
use App\Models\Order;
use App\Models\DealRecord;
use Illuminate\Console\Command; use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\DB;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Facades\Log;
use App\Http\Services\BaseService;
class BatchExportUserPhone extends Command class BatchExportUserPhone extends Command
{ {
/** /**
...@@ -15,16 +17,16 @@ class BatchExportUserPhone extends Command ...@@ -15,16 +17,16 @@ class BatchExportUserPhone extends Command
* *
* @var string * @var string
*/ */
protected $signature = 'batch:export:user:phone'; protected $signature = 'leida:gongkao {page : 要爬取的page}';
/** /**
* The console command description. * The console command description.
* *
* @var string * @var string
*/ */
protected $description = '导出学员电话'; protected $description = '爬取数据';
protected $logFile = ''; protected $logFile = './paqu.txt';
protected $frontUserModel; protected $frontUserModel;
...@@ -32,64 +34,304 @@ class BatchExportUserPhone extends Command ...@@ -32,64 +34,304 @@ class BatchExportUserPhone extends Command
protected $dealRecordModel; protected $dealRecordModel;
//省-市-0-考试类型-招考公告
public static $ksType = [
// '公务员' => '-0-0-2-124',
"教师" => "-0-0-59-124",
"事业单位" => "-0-0-3-124",
"医疗" => "-0-0-60-124",
"选调" => "-0-0-7-124",
"遴选" => "-0-0-63-124",
"选调生" => "-0-0-62-124",
"三支一扶" => "-0-0-8-124",
"大学生村官" => "-0-0-9-124",
"基层工作者" => "-0-0-66-124",
"银行" => "-0-0-67-124",
"国企" => "-0-0-78-124",
"公益性岗位" => "-0-0-80-124",
"军队文职" => "-0-0-249-124",
];
public static $ksArea = [
// 3510 => "国家",
// 1117 => "安徽",
// 1 => "北京",
// 1255 => "福建",
2129 => "广东",
3191 => "甘肃",
2290 => "广西",
2723 => "贵州",
37 => "河北",
1849 => "湖北",
705 => "黑龙江",
1654 => "河南",
2429 => "海南",
1979 => "湖南",
627 => "吉林",
878 => "江苏",
1359 => "江西",
498 => "辽宁",
374 => "内蒙古",
3357 => "宁夏",
3304 => "青海",
2500 => "四川",
1482 => "山东",
859 => "上海",
232 => "山西",
3063 => "陕西",
19 => "天津",
2980 => "西藏",
3390 => "新疆",
2826 => "云南",
1004 => "浙江",
2460 => "重庆",
3508 => "香港",
3509 => "澳门",
3507 => "台湾",
];
protected $client;
protected $jar;
protected $baseStoragePath = 'crawled_data';
/** /**
* Execute the console command. * Execute the console command.
* *
* @return int * @return int
*/ */
public function handle() { public function handle()
$this->frontUserModel = new FrontUserModel(); {
$this->orderModel = new Order();
$this->dealRecordModel = new DealRecord(); $initpage = $this->argument('page');
$LeidaModel = new LeidaModel();
$this->client = new Client([
'timeout' => 30,
'verify' => false, // 注意:生产环境应设为 true
'allow_redirects' => [
'max' => 5,
'strict' => true,
'referer' => true,
'protocols' => ['http', 'https'],
],
]);
$firstDone = false; // 是否已处理第一个特殊循环
$lianxuNone = 0; // 连续三次是未爬取到就die;
foreach (self::$ksArea as $pid => $areaName) {
$line = 1; foreach (self::$ksType as $typeName => $typeIds) {
$refer = 'https://www.gongkaoleida.com/area/3510-0-0-59-124';
// 确定当前外层循环的起始页
$startPage = $firstDone == true ? 1 : $initpage;
$totalPage = $startPage+1;
for ($page = $startPage; $page < $totalPage; $page++) {
$frontUserWhere = [ //省-市-0-考试类型-招考公告
'deleted_at' => null, $url = 'https://www.gongkaoleida.com/area/' . $pid . $typeIds . '?page=' . $page;
]; try {
$frontUserWhere[] = ['phone', '<>', ''];
//日志文件 // 设置默认请求头
$this->logFile = 'user_phone.txt'; $defaultHeaders = [
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language' => 'zh-CN,zh;q=0.9',
'Accept-Encoding' => 'gzip, deflate, br',
'Connection' => 'keep-alive',
'Sec-Ch-Ua' => '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'Referer' => $refer,
'Cookie' => "Hm_lvt_f721d958b1ffbdd95625a927f9bbe719=1764753040; HMACCOUNT=D3E8E8384EAB7DAC; Hm_lvt_a85566772a4d8c7093230e45128ffa8f=1764753040; _c_WBKFRo=qpwRYzkl3x5zkB59FmtRBA7hNLLyiTF2HfalK5J5; _nb_ioWEgULi=; vipInfo=%7B%22isVip%22%3A0%2C%22vipGrade%22%3A0%2C%22vipExpire%22%3A0%7D; gkld_captchaSecret=2f246381c807d7085d53e20ef141809c; userId=11317419; token=c1c6a9fdf0bea1fe6a46314e3dd8c17111d156qkkr1cdejzbb1znjle1cdn6hw5gc9gxe; acw_sc__v2=197d84838-b78ec3f20861d54bee402608fc7cc3a5e15a044e3e021f6f64; acw_tc=0b34264217652651756636995ed7562c242228d8abe852a130a3ec737b74dd; Hm_lpvt_a85566772a4d8c7093230e45128ffa8f=1765265176; Hm_lpvt_f721d958b1ffbdd95625a927f9bbe719=1765265176; XSRF-TOKEN=eyJpdiI6Iit0Y3ZzMWFJTnFyZFZQYVU0SU5XQWc9PSIsInZhbHVlIjoiNnNUMWF1MDFvTVpycFpKVHc3cVFuSlYwVTQ1T3RaUVE2MTVsVnVoOVdLaGdlVEpVNUs1WGFNZFk5XC9FSXQ4TkwiLCJtYWMiOiI1NDE3ODQyZjg2YWUyNWEzNjg2ODNmNTJhNDAxMjlmZDMwZDVmYmNmNzBhNjdmNjEwNWY5NzEzZDVhOTAzNDdjIn0%3D; gkld_session=eyJpdiI6IkRtck9JblFURGdcL1RNVU5iRWN4MENBPT0iLCJ2YWx1ZSI6Ikc4T09hMW1QbFpYeEhmQnM0VUlEbkw5d3ZXMTIyZmtEVlFhdlpyT3AzdlwvOElBQkkxM0dFNkhFdnc1aFNnRVZwT2FPa3dUVUx6OGxRdTJGM2ViN2gyb2htVTFZVXhLZHdocjRNbm5wUmdGVXZ1UUdlaU1vVGNwOEpaOHN5SkhSSSIsIm1hYyI6IjczNGQ4ZGU2Y2ExZDdkMTk3ODU0ZGU4N2VjMWRiMGJkMzRkNWQyZDQ2YTc2YzJiMWZiY2RlZWViZGRhODE0ZmMifQ%3D%3D",
];
$frontUserORM = DB::table('yh_front_user')->where($frontUserWhere); $refer = $url;
$frontUserORM->chunkById(100, function($list) use(&$line) { // 发送请求
$userList = $list->toArray(); $response = $this->client->request('GET', $url, [
'headers' => $defaultHeaders,
]);
foreach ($userList as $userInfo) { // 获取响应
$userInfo = (array)$userInfo; // $statusCode = $response->getStatusCode();
$content = $response->getBody()->getContents();
// dd($content, $url);
// Storage::put('1.html', $content);
$ret = BaseService::htmlExplain($url, $content, $areaName);
sleep(7);
// 行号+1并打印显示进度
var_dump($line);
$line++;
if (!checkPhone($userInfo['phone'])) { if($lianxuNone >= 3){
continue; dd('请更新cookie:'.$url);
}
if(empty($ret['allData'])){
$lianxuNone ++;
Storage::put('error_pq.txt', '未爬取到数据'.$url. "\n");
dump('未爬取到数据:'.$url);
continue;
}
$lianxuNone = 0;
if($ret['continue']){
dump('超过时间不爬:'.$url);
break;
}
$totalPage = $ret['total_pages'];
dump('done:'.$url);
} catch (\Exception $e) {
dd($e->getMessage(), $url);
Log::error('Web crawler error: ' . $e->getMessage(), [
'url' => $url,
'error' => $e->getMessage()
]);
}
} }
// 判断当前用户是否下单过 // 标记第一个循环已处理
$orderInfo = $this->orderModel->findDataWithField(['user_id' => $userInfo['id']]); if (!$firstDone) {
if (!empty($orderInfo)) { $firstDone = true;
continue;
} }
// 判断当前电话是否存在交易记录列表里 }
$dealRecordInfo = $this->dealRecordModel->findDataWithField(['class_phone' => $userInfo['phone']]);
if (!empty($orderInfo)) { }
continue; }
public function crawlWithDynamicCookie()
{
$client = new Client();
// 1. 先访问首页获取最新cookie
$homeResponse = $client->get('https://www.gongkaoleida.com');
$cookies = $homeResponse->getHeader('Set-Cookie');
// dd($homeResponse->getHeaders());
// 2. 解析cookie
// $cookieJar = CookieJar::fromArray(
// $this->parseCookies($cookies),
// 'https://www.gongkaoleida.com'
// );
$cookieJar = $this->createCookieJarFromHeaders($cookies, 'https://www.gongkaoleida.com');
// $cookieArray = $this->parseCookies($cookies);
// $cookieJar = new CookieJar(false, $cookieArray);
dd( $cookies, $cookieJar);
// return $cookieJar;
// 3. 使用新cookie访问目标页面
$response = $client->get('https://target-site.com/target-page', [
'cookies' => $cookieJar,
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
]
]);
return $response->getBody();
}
/**
* 从响应头创建 CookieJar
*/
private function createCookieJarFromHeaders(array $cookieHeaders, string $domain): CookieJar
{
$cookieArray = [];
foreach ($cookieHeaders as $header) {
// 解析 Set-Cookie 头,例如:sessionId=abc123; Path=/; HttpOnly
if (preg_match('/^([^=]+)=([^;]+)/', $header, $matches)) {
$cookieName = trim($matches[1]);
$cookieValue = trim($matches[2]);
$cookieArray[$cookieName] = $cookieValue;
}
}
// 使用 fromArray 方法创建 CookieJar
return CookieJar::fromArray($cookieArray, $domain);
}
private function parseCookies(array $cookieHeaders): array
{
$cookies = [];
foreach ($cookieHeaders as $header) {
preg_match_all('/([^=]+)=([^;]+)/', $header, $matches);
if (isset($matches[1], $matches[2])) {
foreach ($matches[1] as $index => $name) {
$cookies[trim($name)] = trim($matches[2][$index]);
} }
}
}
return $cookies;
}
$this->log($userInfo['phone']);
/**
* 保存网页内容到storage
*/
public function saveToStorage($content, $filename, $subdirectory = null)
{
try {
// 构建完整路径
$path = $this->baseStoragePath;
if ($subdirectory) {
$path .= '/' . trim($subdirectory, '/');
} }
// 确保目录存在
if (!Storage::exists($path)) {
Storage::makeDirectory($path);
}
// 生成唯一文件名
if (!$filename) {
$filename = date('Y-m-d_His') . '.html';
}
$fullPath = $path . '/' . $filename;
// 保存文件
$saved = Storage::put($fullPath, $content);
if ($saved) {
return [
'success' => true,
'path' => $fullPath,
'url' => Storage::url($fullPath),
'size' => Storage::size($fullPath),
];
}
}); return [
'success' => false,
'error' => 'Failed to save file',
];
} catch (\Exception $e) {
Log::error('Save to storage error: ' . $e->getMessage());
return [
'success' => false,
'error' => $e->getMessage(),
];
}
} }
// 记录日志 // 记录日志
protected function log($msg) { protected function log($msg)
{
file_put_contents($this->logFile, trim($msg) . "\n", FILE_APPEND); file_put_contents($this->logFile, trim($msg) . "\n", FILE_APPEND);
} }
......
<?php
namespace App\Console\Commands;
use App\Models\DealRecord;
use App\Models\DealRecordGoods;
use App\Models\DealRecordUser;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Overtrue\Pinyin\Pinyin;
class BatchGetDealRecordOrderNo extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'batch:get:dealRecord:orderNo';
/**
* The console command description.
*
* @var string
*/
protected $description = '批量获取交易记录订单号';
protected $logFile = '';
protected $dealRecordModel;
protected $dealRecordUserModel;
protected $dealRecordGoodsModel;
/**
* Execute the console command.
*
* @return int
*/
public function handle() {
$dealRecordModel = new DealRecord();
$dealRecordUserModel = new DealRecordUser();
$dealRecordGoodsModel = new DealRecordGoods();
$line = 1;
$fileName = '217.txt';
// 打开文件
$fp = fopen($fileName, "r");
$line = 1;
while(!feof($fp)) {
var_dump($line);
$row = fgets($fp);
$row = str_replace("\n", '', trim($row));
$data = explode(" ", $row);
if (empty($data)) {
break;
}
// 先获取学员信息
$dealRecordUserInfo = $dealRecordUserModel->selectDataWithField(['deal_record_no' => $data[0]], ['*']);
if (empty($dealRecordUserInfo)) {
$this->info($line . '行: 单号不存在');
$line++;
continue;
}
foreach ($dealRecordUserInfo as $userInfo) {
// 匹配交易记录商品信息
$dealRecordGoodsInfo = $dealRecordGoodsModel->findDataWithField(['deal_record_user_id' => $userInfo['id'], 'goods_name' => $data[1]]);
if (empty($dealRecordGoodsInfo)) {
continue;
}
var_dump($dealRecordGoodsInfo['order_no']);
$line++;
}
}
}
// 记录日志
protected function log($msg) {
file_put_contents($this->logFile, trim($msg) . "\n", FILE_APPEND);
}
protected function generateOrderNo($userName, $createdTime) {
$userNamePinyin = Pinyin::abbr($userName)->join('');
$cleaned = str_replace(['-', ' ', ':'], '', $createdTime);
$cleaned .= mt_rand(1000000000, 2000000000);
return $userNamePinyin . $cleaned;
}
}
<?php
namespace App\Console\Commands;
use App\Models\DealRecord;
use App\Models\DealRecordGoods;
use App\Models\DealRecordUser;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
class BatchUpdateDealRecordCourt extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'batch:update:dealRecord:court';
/**
* The console command description.
*
* @var string
*/
protected $description = '批量更新法院状态';
protected $logFile = '';
protected $dealRecordModel;
protected $dealRecordUserModel;
protected $dealRecordGoodsModel;
/**
* Execute the console command.
*
* @return int
*/
public function handle() {
$this->dealRecordModel = new DealRecord();
$this->dealRecordUserModel = new DealRecordUser();
$this->dealRecordGoodsModel = new DealRecordGoods();
$line = 1;
$dealRecordWhere = [
'deleted_at' => null,
'contain_court' => '',
];
$dealRecordORM = DB::table('yh_deal_record')->where($dealRecordWhere);
$dealRecordORM->chunkById(100, function($list) use(&$line) {
$dealRecordList = $list->toArray();
foreach ($dealRecordList as $dealRecordInfo) {
$dealRecordInfo = (array)$dealRecordInfo;
$updateData = [
'contain_court' => '',
];
// 行号+1并打印显示进度
var_dump($line);
$line++;
// 查询当前交易记录的学员信息
$dealUserWhere = [];
$dealUserWhere['deal_record_id'] = $dealRecordInfo['id'];
$dealUserInfo = $this->dealRecordUserModel->selectDataWithField($dealUserWhere, ['*']);
$allCourt = [];
foreach ($dealUserInfo as $dealUser) {
$allCourt[] = $dealUser['is_court'];
}
$allCourt = array_unique($allCourt);
if (!empty($allCourt)) {
$updateData['contain_court'] = implode('%%', $allCourt);
}
$updateRes = $this->dealRecordModel->updateData(['id' => $dealRecordInfo['id']], $updateData);
if (false === $updateRes) {
var_dump('第' . $line . '行更新失败');
}
}
});
}
// 记录日志
protected function log($msg) {
file_put_contents($this->logFile, trim($msg) . "\n", FILE_APPEND);
}
}
<?php
namespace App\Console\Commands;
use App\Models\DealRecord;
use App\Models\DealRecordGoods;
use App\Models\DealRecordUser;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Overtrue\Pinyin\Pinyin;
class BatchUpdateDealRecordOrderNo extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'batch:update:dealRecord:orderNo';
/**
* The console command description.
*
* @var string
*/
protected $description = '批量更新交易记录订单号';
protected $logFile = '';
protected $dealRecordModel;
protected $dealRecordUserModel;
protected $dealRecordGoodsModel;
/**
* Execute the console command.
*
* @return int
*/
public function handle() {
$this->dealRecordModel = new DealRecord();
$this->dealRecordUserModel = new DealRecordUser();
$this->dealRecordGoodsModel = new DealRecordGoods();
$line = 1;
$dealRecordWhere = [
'deleted_at' => null,
'contain_order_no' => '',
];
$dealRecordORM = DB::table('yh_deal_record')->where($dealRecordWhere);
$dealRecordORM->chunkById(100, function($list) use(&$line) {
$dealRecordList = $list->toArray();
foreach ($dealRecordList as $dealRecordInfo) {
$dealRecordInfo = (array)$dealRecordInfo;
$updateData = [
'contain_order_no' => '',
];
// 行号+1并打印显示进度
var_dump($line);
$line++;
// 查询当前交易记录的抵扣信息
$dealGoodsWhere = [];
$dealGoodsWhere['deal_record_id'] = $dealRecordInfo['id'];
$dealGoodsInfo = $this->dealRecordGoodsModel->selectDataWithField($dealGoodsWhere, ['*']);
$allOrderNo = [];
foreach ($dealGoodsInfo as $dealGoods) {
if ($dealGoods['order_no'] === '') {
// 重新生成新的订单号
$newOrderNo = $this->generateOrderNo($dealRecordInfo['user_name'], $dealRecordInfo['created_at']);
// 更新
$updateRes = $this->dealRecordGoodsModel->updateData(['id' => $dealGoods['id']], ['order_no' => $newOrderNo]);
if (false === $updateRes) {
var_dump('第' . $line . '行抵扣信息更新失败');
continue;
}
$allOrderNo[] = $newOrderNo;
} else {
$allOrderNo[] = $dealGoods['order_no'];
}
}
$allOrderNo = array_unique($allOrderNo);
if (!empty($allOrderNo)) {
$updateData['contain_order_no'] = implode('%%', $allOrderNo);
}
$updateRes = $this->dealRecordModel->updateData(['id' => $dealRecordInfo['id']], $updateData);
if (false === $updateRes) {
var_dump('第' . $line . '行更新失败');
}
}
});
}
// 记录日志
protected function log($msg) {
file_put_contents($this->logFile, trim($msg) . "\n", FILE_APPEND);
}
protected function generateOrderNo($userName, $createdTime) {
$userNamePinyin = Pinyin::abbr($userName)->join('');
$cleaned = str_replace(['-', ' ', ':'], '', $createdTime);
$cleaned .= mt_rand(1000000000, 2000000000);
return $userNamePinyin . $cleaned;
}
}
<?php
namespace App\Console\Commands;
use App\Models\DealRecord;
use App\Models\DealRecordGoods;
use App\Models\DealRecordUser;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use App\Models\School;
use App\Models\AreaProvinceModel;
use App\Models\SchoolSpecial;
use App\Models\SchoolSpecialDetail;
use App\Models\SchoolProvincescore;
use App\Models\SchoolMajorScore;
use Illuminate\Support\Facades\Http;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Writer\Xlsx;
use PhpOffice\PhpSpreadsheet\Style\Alignment;
use Symfony\Component\HttpFoundation\StreamedResponse;
class BatchUpdateDealRecordStatus extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'batch:update:dealRecord:status {begin} {end}';
/**
* The console command description.
*
* @var string
*/
protected $description = '批量更新交易记录状态';
protected $logFile = '';
protected $dealRecordModel;
protected $dealRecordUserModel;
protected $dealRecordGoodsModel;
/**
* Execute the console command.
*
* @return int
*/
public function handle()
{
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$school = new School();
$SchoolMajorScore = new SchoolMajorScore();
// $SchoolProvincescore = new SchoolProvincescore();
// $areaProvinceModel = new AreaProvinceModel();
// $pdata = $areaProvinceModel->selectDataWithField(['country' => 1], ['id', 'name']);
// $provinceData = array_column($pdata, 'name', 'id');
$school::select(['id', 'school_id'])->where([['id', '>=', $begin], ['id', '<', $end], ['level_name', 'like', "本科%"]])->chunkById(10, function ($list) use (&$SchoolMajorScore) {
$userList = $list->toArray();
foreach ($userList as $userInfo) {
$schoolId = $userInfo['school_id'];
try {
//专业对应检索条件
$url = "https://static-data.gaokao.cn/www/2.0/school/{$schoolId}/dic/professionalscore.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
continue;
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
continue;
}
$yearDataRePid = $bodyData['data']['newsdata']['year'];
} catch (\Exception $e) {
echo $url . ' ';
continue;
}
foreach ($yearDataRePid as $index => $item) {
$provinceid = $index;
foreach ($item as $pKey => $dVal) {
try {
$year = $dVal;
if($year != 2022){
continue;
}
sleep(3);
$url = "https://static-data.gaokao.cn/www/2.0/schoolspecialscore/{$schoolId}/{$year}/{$provinceid}.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dump($url, $bodyData);
continue;
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
continue;
}
foreach ($bodyData['data'] as $k => $v) {
$ex = explode('_', $k);
if ($ex[2] != 0) {
continue;
}
$iData = [];
foreach ($v['item'] as $idk => $idv) {
$tmp = [
'school_id' => $schoolId,
'year' => $year,
'province_id' => $provinceid,
'local_type_id' => $idv['type'],
'local_batch_id' => $idv['batch'],
'min' => $idv['min'],
'min_section' => $idv['min_section'],
'remark' => $idv['sp_fxk'] . '-' . $idv['sp_sxk'],
'sp_info' => $idv['sp_info'],
'sp_name' => $idv['sp_name'],
'special_id' => $idv['special_id'],
'spe_id' => $idv['spe_id'],
'level1_name' => $idv['level1_name'],
'level2_name' => $idv['level2_name'],
'level3_name' => $idv['level3_name'],
'all_data' =>json_encode($idv, JSON_UNESCAPED_UNICODE),
];
$iData[] = $tmp;
}
$SchoolMajorScore->insertMultiData($iData);
}
} catch (\Exception $e) {
echo $url. ' ';
dump($e->getMessage());
sleep(10);
}
}
}
echo $userInfo['id'] . ' ';
}
});
}
public function handle66999()
{
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$school = new School();
$schoolData = $school->selectDataWithField([['level_name', 'like', "本科%"]], ['school_id','name']);
$schoolDataReIds = array_column($schoolData, 'name', 'school_id');
$SchoolMajorScore = new SchoolMajorScore();
// $areaProvinceModel = new AreaProvinceModel();
// $pdata = $areaProvinceModel->selectDataWithField(['country' => 1], ['id', 'name']);
// $provinceData = array_column($pdata, 'name', 'id');
// $yearData = [2024, 2023];
$SchoolMajorScore::select(['*'])->where([['province_id', '=', 12]])->chunkById(1000, function ($list) use ( &$schoolDataReIds) {
$userList = $list->toArray();
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$rowNumber = 2;
$md5Arr = [];
foreach ($userList as $userInfo) {
$jsonData = json_decode($userInfo['all_data'], true);
$year = $userInfo['year'] ?? "";
//抓数据有重复的,去重一下
$md5 = md5($year.'_'.$userInfo['school_id'].'_'. $userInfo['province_id'].'_'. $userInfo['special_id'].$jsonData['info'] ?? '');
if(in_array($md5, $md5Arr)){
continue;
}
$md5Arr[] = $md5;
$schoolName = $schoolDataReIds[$jsonData['school_id']] ?? "";
// 创建一个新的 Spreadsheet 对象
// 设置Excel文件的列头
$sheet->setCellValue('A1', '招生学校');
$sheet->setCellValue('B1', '地区');
$sheet->setCellValue('C1', '招生年份');
$sheet->setCellValue('D1', '招生类型');
$sheet->setCellValue('E1', '专业大类');
$sheet->setCellValue('F1', '专业大类');
$sheet->setCellValue('G1', '最低分数');
$sheet->setCellValue('H1', '最低位次');
$sheet->setCellValue('I1', '选科要求');
$sheet->setCellValue('J1', '招生专业');
$sheet->setCellValue('K1', '备注');
// 设置列宽
$sheet->getColumnDimension("A")->setWidth(20);
$sheet->getColumnDimension("B")->setWidth(10);
$sheet->getColumnDimension("D")->setWidth(10);
$sheet->getColumnDimension("E")->setWidth(10);
$sheet->getColumnDimension("F")->setWidth(20);
$sheet->getColumnDimension("G")->setWidth(10);
$sheet->getColumnDimension("H")->setWidth(10);
$sheet->getColumnDimension("I")->setWidth(20);
$sheet->getColumnDimension("J")->setWidth(20);
$sheet->getColumnDimension("K")->setWidth(20);
// 获取单元格样式
$styleArray = [
'alignment' => [
'horizontal' => Alignment::HORIZONTAL_CENTER, // 水平居中
'vertical' => Alignment::VERTICAL_CENTER, // 垂直居中
],
];
// 将样式应用到单元格A1
$sheet->getStyle('A')->applyFromArray($styleArray);
$sheet->getStyle('B')->applyFromArray($styleArray);
$sheet->getStyle('C')->applyFromArray($styleArray);
$sheet->getStyle('D')->applyFromArray($styleArray);
$sheet->getStyle('E')->applyFromArray($styleArray);
$sheet->getStyle('F')->applyFromArray($styleArray);
$sheet->getStyle('G')->applyFromArray($styleArray);
$sheet->getStyle('H')->applyFromArray($styleArray);
$sheet->getStyle('I')->applyFromArray($styleArray);
// 填充Excel数据
$sheet->setCellValue('A' . $rowNumber, $schoolName);
$sheet->setCellValue('B' . $rowNumber, '天津');
$sheet->setCellValue('C' . $rowNumber, $year);
$sheet->setCellValue('D' . $rowNumber, $jsonData['level1_name'] ?? '');
$sheet->setCellValue('E' . $rowNumber, $jsonData['level2_name'] ??'');
$sheet->setCellValue('F' . $rowNumber, $jsonData['level3_name'] ?? '');
$sheet->setCellValue('G' . $rowNumber, $jsonData['min'] ?? '');
$sheet->setCellValue('H' . $rowNumber, $jsonData['min_section'] ?? '');
$sheet->setCellValue('I' . $rowNumber, $jsonData['sg_info'] ??'');
$sheet->setCellValue('J' . $rowNumber, $jsonData['sp_name'] ??'');
$sheet->setCellValue('K' . $rowNumber, $jsonData['info'] ??'');
$rowNumber++;
echo $rowNumber . ' ';
}
echo $rowNumber . ' ';
$writer = new Xlsx($spreadsheet);
$filePath = storage_path("app/users{$rowNumber}.xlsx");
$writer->save($filePath);
});
// 创建 Excel 文件并返回下载响应
// $writer = new Xlsx($spreadsheet);
// $response = new StreamedResponse(function() use ($writer) {
// $writer->save('./users1.xlsx');
// });
//
// // 设置响应头信息
// $response->headers->set('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
// $response->headers->set('Content-Disposition', 'attachment; filename="data_export.xlsx"');
// $response->headers->set('Cache-Control', 'max-age=0');
// $response->status = 200;
dd(222);
}
public function handle4445()
{
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$school = new School();
$schoolSpecial = new SchoolSpecial();
$schoolSpecialDetail = new SchoolSpecialDetail();
$schoolSpecial->where([['id', '>=', $begin], ['id', '<', $end]])->chunkById(50, function ($list) use (&$schoolSpecial, &$schoolSpecialDetail) {
$userList = $list->toArray();
// dd($userList);
foreach ($userList as $userInfo) {
$schoolId = $userInfo['school_id'];
$spId = $userInfo['sp_id'];
$findRet = $schoolSpecialDetail->findData(['school_id' => $schoolId, 'sp_id' => $spId]);
if (!empty($findRet)) {
continue;
}
$url = "https://static-data.gaokao.cn/www/2.0/school/" . $schoolId . "/special/" . $spId . ".json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
// dd($bodyData);
if ($bodyData['code'] != '0000') {
dump($spId, $bodyData);
continue;
}
if (!isset($bodyData['data'])) {
dump('该学校无专业信息:' . $schoolId . '-' . $spId);
continue;
}
$specialDataB = $bodyData['data'];
// $zhuanyeData = [];
// foreach ($specialDataB as $key => $value) {
try {
$tmp = [
'sp_id' => $specialDataB['id'],
'school_id' => $specialDataB['school_id'],
'special_id' => $specialDataB['special_id'],
'content' => $specialDataB['content'] ?? '<h3>专业介绍</h3>',
'level1_name' => $specialDataB['level1_name'],
'level2_name' => $specialDataB['level2_name'],
'level3_name' => $specialDataB['level3_name'],
'all_data' => json_encode($specialDataB, JSON_UNESCAPED_UNICODE),
];
// $zhuanyeData[] = $tmp;
// }
$schoolSpecialDetail->insertMultiData($tmp);
} catch (\Exception $e) {
dump($e->getMessage());
dump($schoolId . '-' . $spId . '-' . $userInfo['id']);
continue;
}
// dd(555);
echo $schoolId . '-' . $spId . '-' . $userInfo['id'] . ' ';
}
sleep(5);
});
}
public function handle99()
{
$school = new School();
$schoolSpecial = new SchoolSpecial();
$school->where([['id', '>', 2473], ['level_name', 'like', "本科%"]])->chunkById(3, function ($list) use (&$schoolSpecial, &$school) {
$userList = $list->toArray();
// dd($userList);
foreach ($userList as $userInfo) {
$schoolId = $userInfo['sid'];
$url = "https://static-data.gaokao.cn/www/2.0/school/" . $schoolId . "/pc_special.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dump($schoolId, $bodyData);
continue;
}
if (!isset($bodyData['data']['special_detail'][1]) || !isset($bodyData['data']['special_detail'][2])) {
dump('该学校无专业信息:' . $schoolId);
continue;
}
$specialDataB = $bodyData['data']['special_detail'][1];
$specialDataZ = $bodyData['data']['special_detail'][2];
$special_all_type = [];
if (!empty($specialDataB)) {
$zhuanyeData = [];
foreach ($specialDataB as $key => $value) {
if (!in_array($value['level3_name'], $special_all_type)) {
$special_all_type[] = $value['level3_name'];
}
$tmp = [
'sp_id' => $value['id'],
'school_id' => $value['school_id'],
'special_id' => $value['special_id'],
'special_name' => $value['special_name'],
'special_type' => $value['special_type'],
'type_name' => $value['type_name'],
'nation_feature' => $value['nation_feature'],
'province_feature' => $value['province_feature'],
'nation_first_class' => $value['nation_first_class'],
'is_important' => $value['is_important'],
'limit_year' => $value['limit_year'],
'year' => $value['year'],
'level3_weight' => $value['level3_weight'],
'xueke_rank' => $value['xueke_rank'],
'ruanke_level' => $value['ruanke_level'],
'level3_name' => $value['level3_name'],
'level3_code' => $value['level3_code'],
'level2_name' => $value['level2_name'],
'level2_id' => $value['level2_id'],
'level2_code' => $value['level2_code'],
'all_data' => json_encode($value, JSON_UNESCAPED_UNICODE),
];
$zhuanyeData[] = $tmp;
}
$schoolSpecial->insertMultiData($zhuanyeData);
}
if (!empty($specialDataZ)) {
$zhuanyeData = [];
foreach ($specialDataZ as $key => $value) {
if (!in_array($value['level3_name'], $special_all_type)) {
$special_all_type[] = $value['level3_name'];
}
$tmp = [
'sp_id' => $value['id'],
'school_id' => $value['school_id'],
'special_id' => $value['special_id'],
'special_name' => $value['special_name'],
'special_type' => $value['special_type'],
'type_name' => $value['type_name'],
'nation_feature' => $value['nation_feature'],
'province_feature' => $value['province_feature'],
'nation_first_class' => $value['nation_first_class'],
'is_important' => $value['is_important'],
'limit_year' => $value['limit_year'],
'year' => $value['year'],
'level3_weight' => $value['level3_weight'],
'xueke_rank' => $value['xueke_rank'],
'ruanke_level' => $value['ruanke_level'],
'level3_name' => $value['level3_name'],
'level3_code' => $value['level3_code'],
'level2_name' => $value['level2_name'],
'level2_id' => $value['level2_id'],
'level2_code' => $value['level2_code'],
'all_data' => json_encode($value, JSON_UNESCAPED_UNICODE),
];
$zhuanyeData[] = $tmp;
}
$schoolSpecial->insertMultiData($zhuanyeData);
}
$D = $bodyData['data']['special_detail'][3] ?? [];
//更新进去数据
$updateD = ['special_all_type' => json_encode($special_all_type, JSON_UNESCAPED_UNICODE), 'special_data' => json_encode($D, JSON_UNESCAPED_UNICODE)];
$school->updateData(['sid' => $schoolId], $updateD);
echo $schoolId . ' ';
}
});
}
public function handle444()
{
$school = new School();
$areaProvinceModel = new AreaProvinceModel();
$areaProvinceModel->chunkById(100, function ($list) use (&$areaProvinceModel, &$school) {
$userList = $list->toArray();
foreach ($userList as $userInfo) {
$data = $school->findDataWithField(['province_id' => $userInfo['id']], ['province_id', 'province_name']);
$areaProvinceModel->updateData(['id' => $userInfo['id']], ['name' => $data['province_name']]);
echo $userInfo['id'] . ' ';
}
});
}
public function handle222()
{
$school = new School();
$school->chunkById(100, function ($list) use (&$line, &$school) {
$userList = $list->toArray();
foreach ($userList as $userInfo) {
$sid = $userInfo['sid'];
$url = 'https://static-data.gaokao.cn/upload/logo/' . $sid . '.jpg';
$result = $this->downloadImage(
$url,
'./public/uploads/logo',
$sid
);
if ($result['success'] == false) {
dd($result, $userInfo);
}
$school->updateData(['id' => $userInfo['id']], ['logo' => '/uploads/logo/' . $result['url']]);
echo $userInfo['id'] . ' ';
}
});
}
/**
* 下载远程图片到本地服务器
* @param string $url 远程图片URL
* @param string $saveDir 本地保存目录
* @param string $fileName 可选自定义文件名(不含扩展名)
* @return array 返回操作结果数组
*/
function downloadImage($url, $saveDir, $fileName = null)
{
try {
// 验证URL格式
if (!filter_var($url, FILTER_VALIDATE_URL)) {
return [
'success' => false,
'error' => '无效的URL格式'
];
}
// 验证URL可达性
$headers = @get_headers($url, 1);
if (!$headers || strpos($headers[0], '200') === false) {
return [
'success' => false,
'error' => '无法访问远程资源'
];
}
// 验证内容类型是否为图片
$contentType = isset($headers['Content-Type']) ? $headers['Content-Type'] : '';
if (!preg_match('/image\/(jpeg|png|gif|webp|bmp)/i', $contentType)) {
return [
'success' => false,
'error' => '远程资源不是有效的图片类型'
];
}
// 创建保存目录(如果不存在)
if (!file_exists($saveDir)) {
if (!mkdir($saveDir, 0755, true)) {
return [
'success' => false,
'error' => '无法创建保存目录'
];
}
}
// 生成文件名
$ext = pathinfo(parse_url($url, PHP_URL_PATH), PATHINFO_EXTENSION) ?: 'jpg';
$cleanExt = strtolower(preg_replace('/[^a-zA-Z0-9]/', '', $ext)) ?: 'jpg';
if ($fileName) {
// 自定义文件名处理
$baseName = preg_replace('/[^a-zA-Z0-9_-]/', '', $fileName);
$saveName = $baseName . '.' . $cleanExt;
} else {
// 自动生成唯一文件名
$saveName = md5(uniqid() . microtime(true)) . '.' . $cleanExt;
}
$savePath = rtrim($saveDir, '/') . '/' . $saveName;
// 下载文件
$imageData = @file_get_contents($url);
if ($imageData === false) {
return [
'success' => false,
'error' => '下载图片内容失败'
];
}
// 保存文件
if (!file_put_contents($savePath, $imageData)) {
return [
'success' => false,
'error' => '无法保存文件到目录'
];
}
// 二次验证文件有效性
if (!@getimagesize($savePath)) {
@unlink($savePath);
return [
'success' => false,
'error' => '下载的文件不是有效图片'
];
}
return [
'success' => true,
'path' => $savePath,
'size' => filesize($savePath),
'url' => basename($savePath)
];
} catch (\Exception $e) {
return [
'success' => false,
'error' => $e->getMessage()
];
}
}
}
<?php
namespace App\Console\Commands;
use App\Models\School;
use App\Models\SchoolPlan;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Overtrue\Pinyin\Pinyin;
use Illuminate\Support\Facades\Http;
use App\Models\AreaProvinceModel;
use App\Models\SchoolProvincescore;
use App\Models\MajorCategory;
use App\Models\Major;
use App\Models\SchoolPlanBefore;
use App\Models\MajorReSchool;
use App\Models\SchoolMajorScore;
use App\Models\MajorDetail;
class UpdateCourseCode extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'test:data {begin} {end}';
protected $categoryModel;
protected $courseModel;
protected $orderSubModel;
/**
* The console command description.
*
* @var string
*/
protected $description = '爬数据,';
/**
* Execute the console command.
*
* @return int
*/
public static $domain = 'https://gaozhaoapi.eoffcn.com';
public function handle(){
//招生简章列表:https://static-data.gaokao.cn/www/2.0/school/140/news/list.json?a=www.gaokao.cn
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$url = "https://static-data.gaokao.cn/www/2.0/config/dicprovince/dic.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dd($bodyData);
}
$batchData = $bodyData['data'];
$areaProvinceModel = new AreaProvinceModel();
$pdata = $areaProvinceModel->selectDataWithField(['country' => 1], ['id', 'name']);
$provinceData = array_column($pdata, 'name', 'id');
$School = new School();
$SchoolPlanBefore = new SchoolPlanBefore();
$filed = ['id','school_id'];
$School::select($filed)->where([['id', '>=', $begin], ['id','<', $end], ['level_name', 'like', "本科%"]])->chunkById(10, function ($list) use (&$SchoolPlanBefore, &$batchData, &$provinceData) {
$userList = $list->toArray();
foreach ($userList as $v) {
$schoolId = $v['school_id'];
try {
//专业对应检索条件
$url = "https://static-data.gaokao.cn/www/2.0/school/{$schoolId}/dic/specialplan.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
continue;
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
continue;
}
$provinceDataRePid = $bodyData['data']['pids'];
if(empty($provinceDataRePid)){
continue;
}
} catch (\Exception $e) {
echo $url . ' ';
continue;
}
foreach ($provinceDataRePid as $pki => $pid){
$ret = $SchoolPlanBefore->findDataWithField(['school_id'=>$schoolId,'year'=>2025,'province_id'=>$pid], ['id']);
if(!empty($ret)){
continue;
}
$year = 2025;
sleep(2);
try {
$url = "https://static-data.gaokao.cn/www/2.0/schoolspecialplan/{$schoolId}/{$year}/{$pid}.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dump($v['id'], $bodyData);
continue;
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
continue;
}
foreach ($bodyData['data'] as $key => $value) {
$ex = explode('_', $key);
$groupId = $ex[2];
if ($groupId != 0) {
continue;
}
$iData = [];
foreach ($value['item'] as $it => $item) {
$tmp = [
'school_id' => $schoolId,
'year' => $year,
'province_id' => $item['province'],
'province_name' => $provinceData[$pid] ?? "",
'local_type_id' => $item['type'],
'local_type_name' => $batchData[$item['type']] ?? "",
'local_batch_id' => $item['batch'],
'local_batch_name' => $batchData[$item['batch']] ?? "",
'num' => $item['num'],
'level1' => $item['level1'],
'level1_name' => $item['level1_name'],
'level2' => $item['level2'],
'level2_name' => $item['level2_name'],
'level3' => $item['level3'],
'level3_name' => $item['level3_name'],
'sg_info' => $item['sg_info'],
'sg_name' => $item['sg_name'],
'sg_sxk' => $item['sg_sxk'],
'sg_type' => $item['sg_type'],
'sp_name' => $item['sp_name'],
'special_group' => $item['special_group'],
'special_id' => $item['special_id'],
'spe_id' => $item['spe_id'],
// 'special_group_all' =>json_encode($special_group, JSON_UNESCAPED_UNICODE),
'all_data' => json_encode($item, JSON_UNESCAPED_UNICODE),
];
$iData[] = $tmp;
}
$SchoolPlanBefore->insertMultiData($iData);
}
} catch (\Exception $e) {
dump("****************************************");
dump($pid, $e->getMessage());
dump("****************************************");
sleep(2);
}
}
echo $v['id'] . ' ';
}
});
}
public function handle13(){
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$areaProvinceModel = new AreaProvinceModel();
$pdata = $areaProvinceModel->selectDataWithField(['country' => 1], ['id', 'name']);
$provinceData = array_column($pdata, 'name', 'id');
$date = [2024];
$Major = new Major();
$MajorReSchool = new MajorReSchool();
$MajorDetail = new MajorDetail();
$filed = ['id','special_id'];
$Major::select($filed)->where([['id', '>=', $begin], ['id','<', $end]])->chunkById(30, function ($list) use (&$MajorDetail, &$MajorReSchool,&$provinceData, &$date) {
$userList = $list->toArray();
foreach ($userList as $key => $value) {
try {
//专业概况
$url = "https://static-data.gaokao.cn/www/2.0/special/{$value['special_id']}/pc_special_detail.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dd($value['id'], $bodyData);
}
$jsonData = $bodyData['data'];
$MajorDetail->insertData(['special_id'=>$value['special_id'], 'content'=>json_encode($jsonData, JSON_UNESCAPED_UNICODE)]);
//专业对应检索条件
$url = "https://static-data.gaokao.cn/www/2.0/specialscoresort/{$value['special_id']}/dicspecialscore.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dd($value['id'], $bodyData);
}
$searchJsonData = $bodyData['data']['batch'] ?? [];
if(empty($searchJsonData)){
continue;
}
foreach ($searchJsonData as $yearIndex => $batchidValue){
$explode = explode('_', $yearIndex);
$year = $explode[1];
if($year < 2022){
continue;
}
$local_province_id = $explode[0];
if($year == "" || $local_province_id == ""){
continue;
}
if(empty($batchidValue)){
continue;
}
foreach ($batchidValue as $kbIndex => $local_batch_idV){
$local_batch_id = $local_batch_idV['id'];
$max = 1;
for ($i = 1; $i <= $max; $i++) {
sleep(10);
$localUrl = "https://api.zjzw.cn/web/api/?is_single=2&local_batch_id={$local_batch_id}&local_province_id={$local_province_id}&local_type_id=&page={$i}&province_id=&request_type=1&size=20&spe_id={$value['special_id']}&special_id={$value['special_id']}&type=&uri=apidata/api/gk/score/special&year={$year}&signsafe=30ef56fe7c548e83d4b5517381244dda" . mt_rand(100, 999);
// dd($localUrl);
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'Origin' => 'https://www.gaokao.cn',
'Referer' => 'https://www.gaokao.cn',
// 'User-Agent:' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
])->post($localUrl, []);
$bodyDatas = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyDatas['code'] != '0000') {
dump($value['id'],$bodyDatas);
continue;
}
if (!isset($bodyDatas['data']) || empty($bodyData['data'])) {
dump($value['id'], $bodyDatas);
continue;
}
$numFound = $bodyDatas['data']['numFound'];
$allData = $bodyDatas['data']['item'];
if (empty($allData)) {
continue;
}
if (ceil($numFound / 20) > 1) {
$max = ceil($numFound / 20);
}
$inData = [];
foreach ($allData as $k => $v){
$tmp = [
'school_id' => $v['school_id'],
'special_id' => $value['special_id'],
'name' => $v['name'],
'year' => $year,
'local_province_id' => $local_province_id,
'local_province_name' => $provinceData[$local_province_id] ?? "",
'local_type_name' => $v['local_type_name'],
'local_batch_name' => $v['local_batch_name'],
'content'=>json_encode($jsonData, JSON_UNESCAPED_UNICODE)
];
$inData[] = $tmp;
}
$MajorReSchool->insertMultiData($inData);
}
}
}
} catch (\Exception $e) {
dump("****************************************");
dump($value['id'], $e->getMessage());
}
}
});
}
public function handle9(){
try {
$url = "https://static-data.gaokao.cn/www/2.0/config/special/information.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dd(00,$bodyData);
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
dd(11,$bodyData);
}
$jsonData = $bodyData['data'];
$MajorCategory = new MajorCategory();
$Major = new Major();
foreach ($jsonData['id_1']['item'] as $key => $value){
$insertData = [
'name' => $value['name'],
'code' => $value['code'],
'spe_id' => $value['spe_id'],
'level1' => $value['level1'],
'pid' => 0,
];
$insertId = $MajorCategory->insertData($insertData);
if(!isset($jsonData["id_".$value['spe_id']])){
continue;
}
$level2 = $jsonData["id_".$value['spe_id']]['root_id'];
foreach ($jsonData["id_".$value['spe_id']]['item'] as $index => $item){
sleep(15);
$sinsertData = [
'name' => $item['name'],
'code' => $item['code'],
'spe_id' => $item['spe_id'],
'level1' => $item['level1'],
'pid' => $insertId,
];
$MajorCategory->insertData($sinsertData);
$localUrl = "https://api.zjzw.cn/web/api/?keyword=&level1=1&level2={$level2}&level3={$item['spe_id']}&page=1&size=30&sort=&uri=apidata/api/gkv3/special/lists&signsafe=0b8c62335685625f3e2e05eeb98708b5".mt_rand(100,999);
// dd($localUrl);
$response = Http::withHeaders([
'Content-Type' => 'application/json',
// 'authority' => 'api.zjzw.cn',
'Origin' => 'https://www.gaokao.cn',
'Referer' => 'https://www.gaokao.cn',
// 'User-Agent:' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
])->post($localUrl, []);
$bodyDatas = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyDatas['code'] != '0000') {
dd($bodyDatas);
}
if (!isset($bodyDatas['data']) || empty($bodyData['data'])) {
dd(22,$bodyData);
}
$iData = [];
foreach ($bodyDatas['data']['item'] as $k => $z){
$tmp = [
'boy_rate'=>$z['boy_rate'],
'degree'=>$z['degree'],
'fivesalaryavg'=>$z['fivesalaryavg'],
'girl_rate'=>$z['girl_rate'],
'hightitle'=>$z['hightitle'],
'mid'=>$z['id'],
'level1'=>$z['level1'],
'level1_name'=>$z['level1_name'],
'level2'=>$z['level2'],
'level2_name'=>$z['level2_name'],
'level3'=>$z['level3'],
'level3_name'=>$z['level3_name'],
'limit_year'=>$z['limit_year'],
'name'=>$z['name'],
'salaryavg'=>$z['salaryavg'],
'spcode'=>$z['spcode'],
'special_id'=>$z['special_id'],
];
$iData[] = $tmp;
}
$Major->insertMultiData($iData);
}
}
} catch (\Exception $e) {
dump("****************************************");
dump($e->getMessage());
dump("****************************************");
sleep(10);
}
}
//专业介绍: https://static-data.gaokao.cn/www/2.0/special/267/pc_special_detail.json?a=www.gaokao.cn
//排行:https://api.zjzw.cn/web/api/?is_single=2&local_province_id=11&page=1&province_id=&request_type=1&size=20&special_id=267&top_school_id=512,518&type=&uri=apidata/api/gk/special/school&signsafe=a937326294ba583f19643d4d8b7c30dd
//https://api.zjzw.cn/web/api/?is_single=2&local_province_id=11&page=1&province_id=&request_type=1&size=20&sort=xueke_rank&sorttype=asc&special_id=267&top_school_id=512,518&type=&uri=apidata/api/gk/special/school&signsafe=a66aa2fab43a2cecfdac61f69eec4733
public function handle6()
{
$directoryPath = './public/uploads/plan';
$files = scandir($directoryPath);
foreach ($files as $file) {
if ($file === '.' || $file === '..') { // 跳过目录中的"."和".."
continue;
}
$filePath = $directoryPath . '/' . $file;
$sfiles = scandir($filePath);
foreach ($sfiles as $json) {
if ($json === '.' || $json === '..') { // 跳过目录中的"."和".."
continue;
}
$jsonFilePath = $directoryPath . '/' . $file . '/' . $json;
// $jsonFilePath = './public/uploads/plan/140/140_219989_68000.json';
// $jsonFilePath = './public/uploads/plan/1000/1000_42469_68001.json';
$jsonContent = file_get_contents($jsonFilePath);
$bom = pack('H*', 'EFBBBF');
$jsonContent = preg_replace("/^$bom/", '', $jsonContent);
$jsonData = json_decode($jsonContent, true);
if (json_last_error() !== JSON_ERROR_NONE) {
continue;
}
if (empty($jsonData)) {
continue;
}
$html = $jsonData['html'];
$dom = new DOMDocument();
libxml_use_internal_errors(true); // 忽略 HTML 解析警告
// $dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
// 编码处理关键步骤
$html = '<?xml encoding="UTF-8"><!DOCTYPE html>' . $html;
$dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$dom->encoding = 'UTF-8';
$saveDir = './public/uploads/planimg';
// 创建保存目录(如果不存在)
if (!file_exists($saveDir)) {
if (!mkdir($saveDir, 0755, true)) {
dd("无法创建保存目录");
}
}
// 目标域名(需处理的域名)
$targetDomains = ['static-data.eol.cn', 'static-data.gaokao.cn'];
// 遍历所有图片
$jishu = 0;
foreach ($dom->getElementsByTagName('img') as $img) {
$img->removeAttribute('alt');
$originalSrc = $img->getAttribute('src');
// 解析 URL 获取域名部分
$parsedUrl = parse_url($originalSrc);
$host = $parsedUrl['host'] ?? '';
if (in_array($host, $targetDomains)) {
$filename = basename($originalSrc);
$localPath = $saveDir . '/' . $filename;
$downRet = $this->downloadWithCurl($originalSrc, $localPath);
if(isset($downRet['error'])){
dump($downRet);
continue;
}
// dd($downRet, $originalSrc,$localPath);
// file_put_contents($localPath, $imageData);
// 替换为本地路径
$basePath = '/uploads/planimg/' . $filename;
$srcUrl = self::$domain . $basePath;
$img->setAttribute('src', $srcUrl);
$jishu ++;
}
}
if($jishu == 0){
dump("continue: " .$jsonFilePath);
continue; //如果么有替换过,就没必要重新写一份。
}
// 获取修改后的 HTML
$modifiedHtml = $dom->saveHTML();
$modifiedHtml = str_replace(
['<?xml encoding="UTF-8">', '<!DOCTYPE html>'],
'',
$modifiedHtml
);
// 添加新字段
$jsonData['new_html'] = html_entity_decode($modifiedHtml, ENT_QUOTES, 'UTF-8');;
// 写回文件(保留中文并美化格式)
file_put_contents(
$jsonFilePath, json_encode($jsonData, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)
);
dump("done: " .$jsonFilePath);
}
}
}
public function downloadWithCurl($url, $savePath) {
$ch = curl_init();
// 基础配置
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true, // 返回内容不直接输出
CURLOPT_FOLLOWLOCATION => true, // 跟随重定向
CURLOPT_MAXREDIRS => 3, // 最大重定向次数
CURLOPT_TIMEOUT => 30, // 超时时间(秒)
CURLOPT_SSL_VERIFYPEER => false, // 禁用SSL验证 (仅测试环境使用)
CURLOPT_SSL_VERIFYHOST => 0,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' // 模拟浏览器
]);
// 执行请求
$data = curl_exec($ch);
$error = curl_error($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// 错误处理
if ($error) {
return ['error' => "CURL错误: $error"];
}
if ($httpCode !== 200) {
return ['error' => "HTTP状态码: $httpCode"];
}
// 保存文件
if (!file_put_contents($savePath, $data)) {
return ['error' => '文件写入失败'];
}
return ['success' => true];
}
//招生简章
public function handle0(){
//招生简章列表:https://static-data.gaokao.cn/www/2.0/school/140/news/list.json?a=www.gaokao.cn
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$School = new School();
$SchoolPlan = new SchoolPlan();
$filed = ['id','school_id'];
$School::select($filed)->where([['id', '>=', $begin], ['id','<', $end], ['level_name', 'like', "本科%"]])->chunkById(30, function ($list) use (&$SchoolPlan) {
$userList = $list->toArray();
foreach ($userList as $v) {
sleep(5);
$schoolId = $v['school_id'];
try {
$url = "https://static-data.gaokao.cn/www/2.0/school/" . $schoolId . "/news/list.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dump($v['id'], $bodyData);
continue;
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
continue;
}
foreach ($bodyData['data'] as $item){
sleep(5);
//https://static-data.gaokao.cn/www/2.0/school/140/news/68000/219989.json?a=www.gaokao.cn
$planid = $item['id'];
$type = $item['type'];
$url = "https://static-data.gaokao.cn/www/2.0/school/" . $schoolId . "/news/".$type."/".$planid.".json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dump($v['id'], $bodyData);
continue;
}
if (!isset($bodyData['data']) || empty($bodyData['data'])) {
continue;
}
$htmlContent = $bodyData['data']['content'];
$data = [
'html' => $htmlContent,
'timestamp' => date('Y-m-d H:i:s')
];
$jsonData = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
if ($jsonData === false) {
dump('JSON 编码错误: ' . json_last_error_msg());
continue;
}
$saveDir = './public/uploads/plan/'.$schoolId;
// 创建保存目录(如果不存在)
if (!file_exists($saveDir)) {
if (!mkdir($saveDir, 0755, true)) {
dd("无法创建保存目录");
}
}
$filePath = $saveDir.'/'. $schoolId.'_'.$planid.'_'.$type.'.json';
$basePath = '/uploads/plan/'.$schoolId.'/'. $schoolId.'_'.$planid.'_'.$type.'.json';
$result = file_put_contents($filePath, $jsonData);
if ($result === false) {
dump('文件写入失败');
continue;
}
$insertData = [
'plan_id'=>$planid,
'type'=>$type,
'school_id'=>$schoolId,
'title'=>$item['title'],
'year'=>$item['year'],
'content'=>$basePath,
];
$SchoolPlan->insertData($insertData);
}
} catch (\Exception $e) {
dump("****************************************");
dump($v['id'],$e->getMessage());
dump("****************************************");
sleep(10);
}
echo $v['id'] . ' ';
}
});
}
public function handle1(){
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$schoolProvincescore = new SchoolProvincescore();
$filed = ['id','school_id','year','province_id','local_type_id', 'local_batch_id','special_group_id','sg_name','special_group'];
$schoolProvincescore::select($filed)->where([['id', '>=', $begin], ['id', '<', $end]])->chunkById(30, function ($list) use (&$schoolProvincescore) {
$userList = $list->toArray();
foreach ($userList as $v) {
$schoolId = $v['school_id'];
$provinceid = $v['province_id'];
$typeid = $v['local_type_id'];
$year = $v['year'];
if (!empty($v['sg_name']) && $v['special_group'] == "[]") {
sleep(1);
try {
$reqData = ["local_batch_id" => $v['local_batch_id'], "local_province_id" => $provinceid, "local_type_id" => $typeid, "page" => 1, "school_id" => $schoolId, "signsafe" => "b264caa87b003f5903c78a51f09c7341", "size" => 20, "special_group" => $v['special_group_id'], "uri" => "apidata/api/gkv3/plan/school", "year" => $year];
$localUrl = "https://api.zjzw.cn/web/api/?local_batch_id=" . $v['local_batch_id'] . "&local_province_id=" . $provinceid . "&local_type_id=" . $typeid . "&page=1&school_id=" . $schoolId . "&size=20&special_group=" . $v['special_group_id'] . "&uri=apidata/api/gkv3/plan/school&year=" . $year . "&signsafe=b264caa87b003f5903c78a51f09c7341";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->post($localUrl, $reqData);
$bodyDatas = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if($bodyDatas['code'] == 1069){
dump($v,$bodyDatas);
sleep(60);
}
if ($bodyDatas['code'] == '0000') {
$zhuanye = $bodyDatas['data'];
$updateData = [
'special_group' => json_encode($zhuanye, JSON_UNESCAPED_UNICODE),
];
$schoolProvincescore->updateData(['id' => $v['id']], $updateData);
}
} catch (\Exception $e) {
dump($e->getMessage());
sleep(20);
}
}
echo $v['id'] . ' ';
}
});
}
public function handle3()
{
$begin = (int)$this->argument('begin');
$end = (int)$this->argument('end');
dump($begin, $end);
$school = new School();
$schoolProvincescore = new SchoolProvincescore();
$areaProvinceModel = new AreaProvinceModel();
$pdata = $areaProvinceModel->selectDataWithField(['country' => 1], ['id', 'name']);
$provinceData = array_column($pdata, 'name', 'id');
$school::select(['id','school_id'])->where([['id', '>=', $begin], ['id','<', $end], ['level_name', 'like', "本科%"]])->chunkById(30, function ($list) use (&$provinceData, &$schoolProvincescore) {
$userList = $list->toArray();
foreach ($userList as $userInfo) {
try {
$schoolId = $userInfo['school_id'];
//获取科目
$url = "https://static-data.gaokao.cn/www/2.0/school/" . $schoolId . "/dic/provincescore.json?a=www.gaokao.cn";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->get($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyData['code'] != '0000') {
dump($schoolId, $bodyData);
continue;
}
if (!isset($bodyData['data']['newsdata'])) {
dump('newsdata error:' . $schoolId);
continue;
}
if (!isset($bodyData['data']['newsdata']['type'])) {
dump('type error:' . $schoolId);
continue;
}
$typeArr = $bodyData['data']['newsdata']['type'];
foreach ($typeArr as $index => $item) {
sleep(3);
//62_2022
$strArr = explode('_', $index);
$year = $strArr[1];
$provinceid = $strArr[0];
$ret = $schoolProvincescore->findDataWithField(['year'=>$year, 'school_id'=>$schoolId,'province_id'=>$provinceid],['id']);
if(!empty($ret)){
continue;
}
foreach ($item as $typeid) {
// sleep(3);
$reqData = ["local_province_id" => $provinceid, "local_type_id" => $typeid, "page" => 1, "platform" => "2", "school_id" => $schoolId, "signsafe" => "29aef999992363fe732a38f251131980", "size" => 20, "uri" => "v1/school/province_score", "year" => $year];
$url = "https://api-gaokao.zjzw.cn/apidata/web?local_province_id=" . $provinceid . "&local_type_id=" . $typeid . "&page=1&platform=2&school_id=" . $schoolId . "&size=20&uri=v1/school/province_score&year=" . $year . "&signsafe=29aef999992363fe732a38f251131980";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->post($url, $reqData);
$bodyDatas = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if ($bodyDatas['code'] != '0000') {
dump($schoolId, $typeid, $bodyDatas);
continue;
}
$provinceScoreData = $bodyDatas['data']['item'];
// dd($provinceScoreData);
if (empty($provinceScoreData)) {
dump('暂无往年信息 $schoolId:' . $schoolId . ' $typeid: ' . $typeid . ' $bodyDatas: ' . $bodyDatas);
continue;
}
// sleep(5);
$schoolProvincescoreData = [];
foreach ($provinceScoreData as $i => $v) {
// sleep(1);
$zhuanye = [];
// if (!empty($v['sg_name'])) {
// $reqData = ["local_batch_id" => $v['local_batch_id'], "local_province_id" => $provinceid, "local_type_id" => $typeid, "page" => 1, "school_id" => $schoolId, "signsafe" => "b264caa87b003f5903c78a51f09c7341", "size" => 20, "special_group" => $v['special_group'], "uri" => "apidata/api/gkv3/plan/school", "year" => $year];
// $localUrl = "https://api.zjzw.cn/web/api/?local_batch_id=" . $v['local_batch_id'] . "&local_province_id=" . $provinceid . "&local_type_id=" . $typeid . "&page=1&school_id=" . $schoolId . "&size=20&special_group=" . $v['special_group'] . "&uri=apidata/api/gkv3/plan/school&year=" . $year . "&signsafe=b264caa87b003f5903c78a51f09c7341";
// $response = Http::withHeaders([
// 'Content-Type' => 'application/json',
// 'authority' => 'api.zjzw.cn',
// ])->post($localUrl, $reqData);
// $bodyDatas = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
// if ($bodyDatas['code'] != '0000') {
//// dump('专业组error $schoolId:'.$schoolId .' $typeid: '. $typeid . ' $bodyDatas: ');
// dump('返回异常:'.$localUrl);
// sleep(6);
// $zhuanye = [];
// } else {
// $zhuanye = $bodyDatas['data'];
// }
// }
$tmp = [
'school_id' => $schoolId,
'year' => $year,
'province_id' => $provinceid,
'province_name' => $provinceData[$provinceid] ?? "",
'local_type_id' => $typeid,
'local_type_name' => $v['local_type_name'],
'local_batch_id' => $v['local_batch_id'],
'local_batch_name' => $v['local_batch_name'],
'diff' => $v['diff'],
'num' => $v['num'],
'min' => $v['min'],
'min_section' => $v['min_section'],
'sg_info' => $v['sg_info'],
'sg_name' => $v['sg_name'],
'zslx_name' => $v['zslx_name'],
'special_group_id' => $v['special_group'],
'special_group' => json_encode($zhuanye, JSON_UNESCAPED_UNICODE),
'all_data' => json_encode($v, JSON_UNESCAPED_UNICODE),
];
$schoolProvincescoreData[] = $tmp;
}
$schoolProvincescore->insertMultiData($schoolProvincescoreData);
}
}
echo $schoolId . '-' . $userInfo['id'] . ' ';
} catch (\Exception $e) {
echo $schoolId . '-' . $userInfo['id'] . ' ';
dump($e->getMessage());
sleep(10);
}
}
});
}
public function handle222()
{
$school = new School();
for ($i = 21; $i<= 148; $i ++){
$url = "https://api.zjzw.cn/web/api/?keyword=&page=".$i."&province_id=&ranktype=&request_type=1&size=20&type=&uri=apidata/api/gkv3/school/lists&signsafe=d72360cd2d5143988b6dcf5e909b644c";
$response = Http::withHeaders([
'Content-Type' => 'application/json',
'authority' => 'api.zjzw.cn',
])->post($url, []);
$bodyData = $response->json(); // 如果响应是 JSON 格式,解析为数组或对象
if($bodyData['code'] != '0000'){
dd($bodyData);
}
$reqAllData = $bodyData['data']['item'];
$insertData = [];
foreach ($reqAllData as $key => $reqData){
$data = [
'sid' => $reqData['school_id'],
'hightitle' => $reqData['hightitle'],
'name' => $reqData['name'],
'belong' => $reqData['belong'],
'province_id' => $reqData['province_id'],
'province_name' => $reqData['province_name'],
'city_id' => $reqData['city_id'],
'dual_class' => $reqData['dual_class'],
'dual_class_name' => $reqData['dual_class_name'],
'feyy' => $reqData['f211'],
'fjbw' => $reqData['f985'],
'level_name' => $reqData['level_name'],
'nature_name' => $reqData['nature_name'],
'tag_name' => $reqData['tag_name'],
'type_name' => $reqData['type_name'],
'central' => $reqData['central'],
// 'xueke_rank_score' => $reqData['xueke_rank_score'],
'recommend_master_level' => $reqData['recommend_master_level'],
'all_data' => json_encode($reqData, JSON_UNESCAPED_UNICODE),
];
$insertData[] = $data;
}
$school->insertMultiData($insertData);
sleep(5);
dump($i);
}
}
}
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use Illuminate\Support\Facades\Cache;
class test extends Command
{
protected $signature = 'crawl:dynamic-token';
protected $description = '使用动态Token管理爬取公考雷达';
private $client;
private $baseUrl = 'https://www.gongkaoleida.com';
private $cookieJar;
private $sessionData = [];
public function handle()
{
// https://api.gongkaoleida.com/api/FrontBackSecure/article/detailUserPc?appid=uqsFgLOVbuPrfn1v&articleId=2716421&from_device=h5&timestamp=1765185164&token=569fff2952c0317c29f3308df15cd75d11d15fmern1cdeol3f9tcfo31cdnb3o9o5yd03&userId=26239811&sign=a11b430c7a5610930bd603b7c6d05df5
// https://api.gongkaoleida.com/api/FrontBackSecure/article/detailUserPc?appid=uqsFgLOVbuPrfn1v&articleId=2455556&from_device=h5&timestamp=1765185842&token=c1c6a9fdf0bea1fe6a46314e3dd8c17111d156qkkr1cdejzbb1znjle1cdn6hw5gc9gxe&userId=11317419&sign=ebf4a4832ade58f2a45b772d273a838c
// https://api.gongkaoleida.com/api/FrontBackSecure/article/detailUserPc?appid=uqsFgLOVbuPrfn1v&articleId=2305006&from_device=h5&timestamp=1765251531&token=569fff2952c0317c29f3308df15cd75d11d15fmern1cdeol3f9tcfo31cdnb3o9o5yd03&userId=26239811&sign=31b7c0372a445598bbbe40aabb987acc
$s = 'appid=uqsFgLOVbuPrfn1v&articleId=2305006&from_device=h5&timestamp=1765251531&token=569fff2952c0317c29f3308df15cd75d11d15fmern1cdeol3f9tcfo31cdnb3o9o5yd03&userId=26239811';
$s .= '&secret=Hf6yn1JPb1QZxniWhIPv1IrHbWeLh2e8';
dd(md5(urlencode($s)), $s);
dd(333);
$this->initializeClient();
// 1. 启动会话
$this->startSession();
// 2. 爬取目标页面
$url = $this->argument('url');
$result = $this->smartRequest($url);
dd($result);
if ($result['success']) {
$this->processResult($result);
}
return 0;
}
private function initializeClient(): void
{
$this->cookieJar = new CookieJar();
$this->client = new Client([
'base_uri' => $this->baseUrl,
'cookies' => $this->cookieJar,
'timeout' => 30,
'headers' => [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'zh-CN,zh;q=0.9',
'Accept-Encoding' => 'gzip, deflate, br',
],
]);
}
/**
* 启动会话
*/
private function startSession(): void
{
$this->info('启动会话...');
// 访问首页获取初始token
$response = $this->client->get('/');
// 提取token
$this->extractTokensFromResponse($response);
// 访问API端点激活会话
$this->activateSession();
$this->info('会话启动完成');
}
/**
* 从响应中提取token
*/
private function extractTokensFromResponse($response): void
{
// 从Cookie获取
$cookies = $this->cookieJar->toArray();
foreach ($cookies as $cookie) {
if (isset($cookie['Name'])) {
$name = $cookie['Name'];
$value = $cookie['Value'] ?? '';
if ($name === 'XSRF-TOKEN') {
$this->sessionData['xsrf_token'] = $value;
$this->sessionData['xsrf_decoded'] = urldecode($value);
} elseif ($name === 'gkld_session') {
$this->sessionData['session'] = $value;
}
}
}
// 从HTML中获取CSRF token
$body = $response->getBody()->getContents();
if (preg_match('/<meta name="csrf-token" content="([^"]+)"/', $body, $matches)) {
$this->sessionData['csrf_token'] = $matches[1];
}
// 记录token信息
$this->logTokenInfo();
}
/**
* 激活会话
*/
private function activateSession(): void
{
try {
// 发送一个AJAX请求,模拟用户活动
$response = $this->client->get('/api/session/keepalive', [
'headers' => [
'X-Requested-With' => 'XMLHttpRequest',
'X-CSRF-TOKEN' => $this->sessionData['csrf_token'] ?? '',
'X-XSRF-TOKEN' => $this->sessionData['xsrf_decoded'] ?? '',
],
]);
// 更新token
$this->extractTokensFromResponse($response);
} catch (\Exception $e) {
// 忽略错误,不是所有网站都有这个端点
}
}
/**
* 智能请求(自动处理token刷新)
*/
private function smartRequest(string $url, int $maxRetries = 3): array
{
$retryCount = 0;
while ($retryCount < $maxRetries) {
try {
// 确保token有效
$this->ensureTokenValid();
$response = $this->client->get($url, [
'headers' => $this->getRequestHeaders($url),
]);
dd($url, $response->getBody()->getContents());
// 更新token
$this->extractTokensFromResponse($response);
return [
'success' => true,
'status' => $response->getStatusCode(),
'content' => $response->getBody()->getContents(),
];
} catch (\Exception $e) {
$retryCount++;
// 如果是token相关错误,刷新token
if ($this->isTokenError($e)) {
$this->refreshToken();
}
if ($retryCount >= $maxRetries) {
return [
'success' => false,
'error' => $e->getMessage(),
];
}
sleep(pow(2, $retryCount)); // 指数退避
}
}
return ['success' => false, 'error' => 'Max retries exceeded'];
}
/**
* 确保token有效
*/
private function ensureTokenValid(): void
{
$lastUpdate = Cache::get('token_last_update', 0);
$interval = $this->option('interval');
if (time() - $lastUpdate >= $interval) {
$this->refreshToken();
}
}
/**
* 刷新token
*/
private function refreshToken(): void
{
$this->info('刷新token...');
try {
// 访问一个轻量级页面
$response = $this->client->get('/');
$this->extractTokensFromResponse($response);
Cache::put('token_last_update', time(), 300);
$this->info('Token刷新完成');
} catch (\Exception $e) {
$this->error('刷新token失败: ' . $e->getMessage());
}
}
/**
* 获取请求头
*/
private function getRequestHeaders(string $url): array
{
$headers = [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'zh-CN,zh;q=0.9',
'Accept-Encoding' => 'gzip, deflate, br',
'Referer' => $this->baseUrl . '/',
];
// 添加CSRF token
if (!empty($this->sessionData['csrf_token'])) {
$headers['X-CSRF-TOKEN'] = $this->sessionData['csrf_token'];
}
// 添加XSRF token
if (!empty($this->sessionData['xsrf_decoded'])) {
$headers['X-XSRF-TOKEN'] = $this->sessionData['xsrf_decoded'];
}
return $headers;
}
/**
* 检查是否是token错误
*/
private function isTokenError(\Exception $e): bool
{
$message = $e->getMessage();
return strpos($message, '419') !== false ||
strpos($message, 'CSRF') !== false ||
strpos($message, 'Token') !== false;
}
/**
* 记录token信息
*/
private function logTokenInfo(): void
{
$info = [];
if (isset($this->sessionData['csrf_token'])) {
$info['CSRF Token'] = substr($this->sessionData['csrf_token'], 0, 20) . '...';
}
if (isset($this->sessionData['xsrf_token'])) {
$info['XSRF Token'] = substr($this->sessionData['xsrf_token'], 0, 20) . '...';
}
if (isset($this->sessionData['session'])) {
$info['Session'] = substr($this->sessionData['session'], 0, 20) . '...';
}
$this->info('当前Token状态:');
foreach ($info as $key => $value) {
$this->line(" {$key}: {$value}");
}
}
/**
* 处理结果
*/
private function processResult(array $result): void
{
$this->info('爬取成功!');
$this->info("状态码: {$result['status']}");
$this->info("内容长度: " . strlen($result['content']) . " 字节");
if ($this->option('save')) {
$this->saveResult($result['content']);
}
}
/**
* 保存结果
*/
private function saveResult(string $content): void
{
$filename = storage_path('app/crawled/' . date('Ymd_His') . '.html');
file_put_contents($filename, $content);
$this->info("结果已保存到: {$filename}");
}
}
...@@ -5,14 +5,17 @@ namespace App\Http\Services; ...@@ -5,14 +5,17 @@ namespace App\Http\Services;
use App\Http\Controllers\Controller; use App\Http\Controllers\Controller;
use App\Models\Config; use App\Models\Config;
use Ramsey\Uuid\Uuid; use Ramsey\Uuid\Uuid;
use Godruoyi\Snowflake\Snowflake;; use Godruoyi\Snowflake\Snowflake;
use App\Models\LeidaModel;
class BaseService extends Controller class BaseService extends Controller
{ {
/** /**
* 获取格式化的时间 * 获取格式化的时间
* @param integer $time [需要转换的unix时间戳] * @param integer $time [需要转换的unix时间戳]
* @param string $format [转换成的格式] * @param string $format [转换成的格式]
* @return [type] [string] * @return [type] [string]
*/ */
public function getDataFormat($time = 0, $format = 'Y-m-d H:i:s') public function getDataFormat($time = 0, $format = 'Y-m-d H:i:s')
...@@ -26,7 +29,7 @@ class BaseService extends Controller ...@@ -26,7 +29,7 @@ class BaseService extends Controller
/** /**
* 获取配置项 * 获取配置项
* @param [type] $configKey [key] * @param [type] $configKey [key]
* @param integer $type [type] * @param integer $type [type]
* @return [type] [description] * @return [type] [description]
*/ */
public function getConfigValue($configKey, $type = 1) public function getConfigValue($configKey, $type = 1)
...@@ -44,7 +47,8 @@ class BaseService extends Controller ...@@ -44,7 +47,8 @@ class BaseService extends Controller
return $configInfo['config_value'] ?? ''; return $configInfo['config_value'] ?? '';
} }
protected function buildTree($list, $pid = 0) { protected function buildTree($list, $pid = 0)
{
$tree = []; $tree = [];
foreach ($list as $data) { foreach ($list as $data) {
...@@ -57,15 +61,165 @@ class BaseService extends Controller ...@@ -57,15 +61,165 @@ class BaseService extends Controller
return $tree; return $tree;
} }
protected function getUUid() { protected function getUUid()
{
return Uuid::uuid4()->toString(); return Uuid::uuid4()->toString();
} }
protected function getSnowflake() { protected function getSnowflake()
{
$snowflake = new Snowflake(1, 1); $snowflake = new Snowflake(1, 1);
$snowflake->setStartTimeStamp(strtotime('2024-01-01')*1000); $snowflake->setStartTimeStamp(strtotime('2024-01-01') * 1000);
return $snowflake->id(); return $snowflake->id();
} }
public static function htmlExplain($requestUrl, $html, $areaName)
{
// $html = file_get_contents( './list.html');
$LeidaModel = new LeidaModel();
$dom = new \DOMDocument();
@$dom->loadHTML($html, LIBXML_NOERROR | LIBXML_NOWARNING);
$xpath = new \DOMXPath($dom);
// echo "=== 考试类型链接 ===\n";
$examNodes = $xpath->query("//dl[dt='考试类型:']//ul[@class='type-list']/li/a");
// foreach ($examNodes as $node) {
// echo trim($node->nodeValue) . ": " . $node->getAttribute('href') . "\n";
// }
// echo "\n=== 公告列表 ===\n";
$noticeNodes = $xpath->query("//div[@class='mdn-content-l']//ul[@class='link-list']/li");
// 最佳方法:查找主内容区的公告列表
$mainContent = $xpath->query("//div[contains(@class, 'mdn-content-l')]//ul[@class='link-list']/li");
// echo "主内容区公告数量: " . $mainContent->length . "\n";
$allData = [];
if ($mainContent->length > 0) {
// 提取总页数(从尾页链接)
$total_pages = 0;
$lastPageNode = $xpath->query("//a[contains(text(), '尾页')]");
if ($lastPageNode->length > 0) {
$href = $lastPageNode->item(0)->getAttribute('href');
if (preg_match('/page=(\d+)/', $href, $matches)) {
$total_pages = (int)$matches[1];
}
}
$insertData = [];
$dateFlag = 0;
foreach ($mainContent as $index => $node) {
// 提取所有标签
$labels = [];
$labelNodes = $xpath->query(".//i[@class='notice-label']", $node);
foreach ($labelNodes as $labelNode) {
$label = trim($labelNode->nodeValue);
$label = str_replace(['[', ']'], '', $label);
$labels[] = $label;
}
// 提取标题和链接
$linkNode = $xpath->query(".//a", $node)->item(0);
if ($linkNode) {
$title = trim($linkNode->nodeValue);
$url = $linkNode->getAttribute('href');
$labels['title'] = $title;
$labels['url'] = $url;
// 提取日期
$timeNode = $xpath->query(".//time", $node)->item(0);
$date = $timeNode ? trim($timeNode->nodeValue) : '';
$labels['date'] = $date;
// // 格式化输出
// if (count($labels) >= 2) {
// // 通常第一个是地区,第二个是考试类型
// echo "[{$labels[0]}]-[{$labels[1]}]-$title:$url\n";
// } elseif (count($labels) == 1) {
// echo "[{$labels[0]}]-[未知]-$title:$url\n";
// } else {
// echo "[未知]-[未知]-$title:$url\n";
// }
}
//时间超过2024年
if(strtotime($labels['date']) && strtotime($labels['date']) <= strtotime('2024-11-01')){
$dateFlag = 1;
continue;
}
$allData[] = $labels;
$insertData[] = [
'param'=>$requestUrl,
'province'=>$areaName,
'area'=>$labels[0],
'type'=>$labels[1],
'title'=>$labels['title'],
'url'=>$labels['url'],
'stime'=>$labels['date'],
'content'=>json_encode($labels),
];
}
$LeidaModel->insertMultiData($insertData);
if($dateFlag){
return ['allData' => $allData, 'total_pages' => $total_pages, 'continue'=>true];
}
return ['allData' => $allData, 'total_pages' => $total_pages, 'continue'=>false];
} else {
return ['allData' => [], 'total_pages' => 0, 'continue'=>false];
// 备用方法:查找所有可能包含公告的li
$allLi = $xpath->query("//li");
$noticeCount = 0;
foreach ($allLi as $node) {
// 检查是否包含notice-label
$labelNodes = $xpath->query(".//i[@class='notice-label']", $node);
$linkNode = $xpath->query(".//a", $node)->item(0);
if ($labelNodes->length > 0 && $linkNode) {
$noticeCount++;
// 提取标签
$labels = [];
foreach ($labelNodes as $labelNode) {
$label = trim($labelNode->nodeValue);
$label = str_replace(['[', ']'], '', $label);
$labels[] = $label;
}
$title = trim($linkNode->nodeValue);
$url = $linkNode->getAttribute('href');
$labels['title'] = $title;
$labels['url'] = $url;
if (count($labels) >= 2) {
echo "[{$labels[0]}]-[{$labels[1]}]-$title:$url\n";
}
}
}
echo "通过备用方法找到公告数量: $noticeCount\n";
}
var_dump($allData);
die;
}
} }
<?php
namespace App\Http\Services;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Middleware;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\Log;
class GgongkaoLeidaCrawler
{
private $client;
private $baseUrl = 'https://www.gongkaoleida.com';
private $cookieJar;
private $sessionActive = false;
private $lastCookieUpdate = null;
private $cookieRefreshInterval = 25;
private $requestDelay = 2;
public function __construct()
{
$this->initializeClient();
}
private function initializeClient(): void
{
$this->cookieJar = new CookieJar();
$stack = HandlerStack::create();
$stack->push(Middleware::cookies($this->cookieJar));
$this->client = new Client([
'base_uri' => $this->baseUrl,
'handler' => $stack,
'cookies' => $this->cookieJar,
'timeout' => 30,
'connect_timeout' => 10,
]);
}
/**
* 初始化会话
*/
public function initializeSession(): bool
{
try {
Log::info('初始化公考雷达爬虫会话...');
// 访问首页获取初始cookie
$response = $this->client->get('/', [
'headers' => $this->getDefaultHeaders(),
]);
$this->updateCookiesFromResponse($response);
$this->sessionActive = true;
$this->lastCookieUpdate = time();
Log::info('会话初始化完成', [
'cookie_count' => count($this->cookieJar->toArray())
]);
return true;
} catch (\Exception $e) {
Log::error('会话初始化失败: ' . $e->getMessage());
return false;
}
}
/**
* 使用特定的cookie字符串初始化
*/
public function initializeWithCookieString(string $cookieString): bool
{
try {
$cookies = $this->parseCookieString($cookieString);
foreach ($cookies as $name => $value) {
$cookie = new \GuzzleHttp\Cookie\SetCookie([
'Name' => $name,
'Value' => $value,
'Domain' => 'www.gongkaoleida.com',
'Path' => '/',
]);
$this->cookieJar->setCookie($cookie);
}
$this->sessionActive = true;
$this->lastCookieUpdate = time();
Log::info('使用cookie字符串初始化成功', [
'cookie_count' => count($cookies),
'keys' => array_keys($cookies)
]);
return true;
} catch (\Exception $e) {
Log::error('使用cookie字符串初始化失败: ' . $e->getMessage());
return false;
}
}
/**
* 爬取单个页面
*/
public function crawl(string $url): array
{
if (!$this->sessionActive) {
$this->initializeSession();
}
// 确保cookie是新鲜的
$this->ensureFreshCookie();
// 请求延迟
sleep($this->requestDelay);
try {
$fullUrl = strpos($url, 'http') === 0 ? $url : $this->baseUrl . $url;
Log::info('开始爬取: ' . $fullUrl);
$response = $this->client->get($fullUrl, [
'headers' => $this->getRequestHeaders($fullUrl),
'allow_redirects' => [
'max' => 5,
'strict' => true,
'referer' => true,
],
]);
// 更新cookie
$this->updateCookiesFromResponse($response);
$statusCode = $response->getStatusCode();
$body = $response->getBody()->getContents();
if ($statusCode === 200 && $this->isValidResponse($body)) {
Log::info('爬取成功', [
'url' => $fullUrl,
'status' => $statusCode,
'length' => strlen($body)
]);
return [
'success' => true,
'status' => $statusCode,
'content' => $body,
'cookies' => $this->getCookieSummary(),
];
} else {
throw new \Exception("无效响应,状态码: {$statusCode}");
}
} catch (\Exception $e) {
Log::error('爬取失败: ' . $e->getMessage());
if ($this->isSessionError($e)) {
$this->sessionActive = false;
}
return [
'success' => false,
'error' => $e->getMessage(),
'cookies' => $this->getCookieSummary(),
];
}
}
/**
* 批量爬取
*/
public function batchCrawl(array $urls, int $delayBetween = 5): array
{
$results = [];
foreach ($urls as $index => $url) {
Log::info("批量爬取进度: " . ($index + 1) . "/" . count($urls));
$result = $this->crawl($url);
$results[$url] = $result;
// 延迟
if ($index < count($urls) - 1) {
$sleepTime = $delayBetween + mt_rand(1, 3);
sleep($sleepTime);
}
}
return $results;
}
/**
* 解析HTML并提取数据
*/
public function extractJobsFromHtml(string $html): array
{
$dom = new \DOMDocument();
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$xpath = new \DOMXPath($dom);
$jobs = [];
// 根据公考雷达的实际HTML结构调整这些选择器
$jobElements = $xpath->query('//div[contains(@class, "job-item") or contains(@class, "position-item")]');
if ($jobElements->length === 0) {
// 尝试其他可能的选择器
$jobElements = $xpath->query('//li[contains(@class, "job") or contains(@class, "position")]');
}
foreach ($jobElements as $jobElement) {
$job = [
'title' => '',
'company' => '',
'location' => '',
'salary' => '',
'publish_time' => '',
'details_url' => '',
];
// 提取职位标题
$titleNodes = $xpath->query('.//h3 | .//h4 | .//a[contains(@class, "title")]', $jobElement);
if ($titleNodes->length > 0) {
$job['title'] = trim($titleNodes->item(0)->textContent);
}
// 提取公司名称
$companyNodes = $xpath->query('.//div[contains(@class, "company")] | .//span[contains(@class, "company")]', $jobElement);
if ($companyNodes->length > 0) {
$job['company'] = trim($companyNodes->item(0)->textContent);
}
// 提取工作地点
$locationNodes = $xpath->query('.//span[contains(@class, "location") or contains(@class, "city")]', $jobElement);
if ($locationNodes->length > 0) {
$job['location'] = trim($locationNodes->item(0)->textContent);
}
// 提取发布时间
$timeNodes = $xpath->query('.//span[contains(@class, "time") or contains(@class, "date")]', $jobElement);
if ($timeNodes->length > 0) {
$job['publish_time'] = trim($timeNodes->item(0)->textContent);
}
// 提取详情链接
$linkNodes = $xpath->query('.//a[contains(@href, "/job/") or contains(@href, "/position/")]', $jobElement);
if ($linkNodes->length > 0) {
$href = $linkNodes->item(0)->getAttribute('href');
$job['details_url'] = strpos($href, 'http') === 0 ? $href : $this->baseUrl . $href;
}
$jobs[] = $job;
}
return $jobs;
}
/**
* 保存HTML内容到文件
*/
public function saveHtmlToFile(string $html, string $url): string
{
$filename = 'gongkaoleida_' . date('Ymd_His') . '_' . md5($url) . '.html';
$path = storage_path('app/crawled/' . $filename);
// 确保目录存在
if (!is_dir(dirname($path))) {
mkdir(dirname($path), 0755, true);
}
file_put_contents($path, $html);
return $path;
}
/**
* 保存数据到JSON文件
*/
public function saveDataToJson(array $data, string $filename): string
{
$path = storage_path('app/crawled/' . $filename);
// 确保目录存在
if (!is_dir(dirname($path))) {
mkdir(dirname($path), 0755, true);
}
file_put_contents($path, json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
return $path;
}
/**
* 获取当前会话信息
*/
public function getSessionInfo(): array
{
return [
'active' => $this->sessionActive,
'last_cookie_update' => $this->lastCookieUpdate ? date('Y-m-d H:i:s', $this->lastCookieUpdate) : null,
'cookie_count' => count($this->cookieJar->toArray()),
];
}
/**
* 获取cookie摘要
*/
public function getCookieSummary(): array
{
$cookies = $this->cookieJar->toArray();
$summary = [];
foreach ($cookies as $cookie) {
$name = $cookie['Name'] ?? 'unknown';
$value = $cookie['Value'] ?? '';
$summary[$name] = strlen($value) > 30 ? substr($value, 0, 30) . '...' : $value;
}
return $summary;
}
/**
* 确保cookie是新鲜的
*/
private function ensureFreshCookie(): void
{
$currentTime = time();
if ($this->lastCookieUpdate === null ||
($currentTime - $this->lastCookieUpdate) >= $this->cookieRefreshInterval) {
Log::info('刷新Cookie...');
$this->refreshCookie();
}
}
/**
* 刷新cookie
*/
private function refreshCookie(): void
{
try {
$response = $this->client->get('/', [
'headers' => $this->getDefaultHeaders(),
]);
$this->updateCookiesFromResponse($response);
$this->lastCookieUpdate = time();
} catch (\Exception $e) {
Log::error('刷新Cookie失败: ' . $e->getMessage());
throw $e;
}
}
/**
* 从响应更新cookie
*/
private function updateCookiesFromResponse($response): void
{
$cookieHeaders = $response->getHeader('Set-Cookie');
foreach ($cookieHeaders as $header) {
$cookie = $this->parseSetCookieHeader($header);
if ($cookie) {
$this->cookieJar->setCookie($cookie);
}
}
}
/**
* 解析Set-Cookie头
*/
private function parseSetCookieHeader(string $header): ?\GuzzleHttp\Cookie\SetCookie
{
$parts = explode(';', $header);
$nameValue = explode('=', trim($parts[0]), 2);
if (count($nameValue) !== 2) {
return null;
}
$name = trim($nameValue[0]);
$value = trim($nameValue[1]);
$cookieData = [
'Name' => $name,
'Value' => $value,
'Domain' => 'www.gongkaoleida.com',
'Path' => '/',
];
for ($i = 1; $i < count($parts); $i++) {
$part = trim($parts[$i]);
if (strpos($part, '=') !== false) {
list($attrName, $attrValue) = explode('=', $part, 2);
$attrName = strtolower(trim($attrName));
switch ($attrName) {
case 'domain':
$cookieData['Domain'] = $attrValue;
break;
case 'path':
$cookieData['Path'] = $attrValue;
break;
case 'expires':
$cookieData['Expires'] = strtotime($attrValue);
break;
case 'max-age':
$cookieData['Max-Age'] = (int)$attrValue;
break;
}
} else {
if (strtolower($part) === 'secure') {
$cookieData['Secure'] = true;
} elseif (strtolower($part) === 'httponly') {
$cookieData['HttpOnly'] = true;
}
}
}
return new \GuzzleHttp\Cookie\SetCookie($cookieData);
}
/**
* 解析cookie字符串
*/
private function parseCookieString(string $cookieString): array
{
$cookies = [];
$pairs = explode(';', $cookieString);
foreach ($pairs as $pair) {
$pair = trim($pair);
if (empty($pair)) continue;
$parts = explode('=', $pair, 2);
if (count($parts) === 2) {
$name = trim($parts[0]);
$value = trim($parts[1]);
$cookies[$name] = $value;
}
}
return $cookies;
}
/**
* 获取默认请求头
*/
private function getDefaultHeaders()
{
return [
'User-Agent' => $this->getRandomUserAgent(),
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Language' => 'zh-CN,zh;q=0.9',
'Accept-Encoding' => 'gzip, deflate, br',
];
}
/**
* 获取请求头
*/
private function getRequestHeaders(string $url)
{
$headers = $this->getDefaultHeaders();
if (strpos($url, $this->baseUrl) === 0) {
$headers['Referer'] = $this->baseUrl . '/';
}
return $headers;
}
/**
* 获取随机User-Agent
*/
private function getRandomUserAgent(): string
{
$userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
];
return $userAgents[array_rand($userAgents)];
}
/**
* 检查响应是否有效
*/
private function isValidResponse(string $body): bool
{
if (strpos($body, '403 Forbidden') !== false ||
strpos($body, 'Access Denied') !== false ||
strpos($body, '机器人检测') !== false) {
return false;
}
if (strpos($body, '公务员') !== false ||
strpos($body, '公考雷达') !== false ||
strlen($body) > 2000) {
return true;
}
return false;
}
/**
* 检查是否是会话错误
*/
private function isSessionError(\Exception $e): bool
{
$message = $e->getMessage();
return strpos($message, '419') !== false ||
strpos($message, '403') !== false ||
strpos($message, '401') !== false;
}
}
\ No newline at end of file
<?php
/**
* Created by PhpStorm.
* User: bmb369
* Date: 2024-05-07
* Time: 14:20
*/
namespace App\Models;
use Illuminate\Database\Eloquent\SoftDeletes;
class LeidaModel extends Base {
protected $table = 'gz_leida';
use SoftDeletes;
protected $dates = ['deleted_at'];
protected $primaryKey = 'id';
public function getInfo($id)
{
$data = $this->select($this->table.'.'.'*')
->where([$this->table.'.id'=>$id])
->first();
if (empty($data)) {
return [];
} else {
return $data->toArray();
}
}
}
\ No newline at end of file
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-59-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=1
未爬取到数据:
未爬取到数据:
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-3-124?page=46
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-59-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-60-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-7-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-63-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-62-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-8-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-9-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-66-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-67-124?page=1
未爬取到数据:https://www.gongkaoleida.com/area/3510-0-0-78-124?page=1
This source diff could not be displayed because it is too large. You can view the blob instead.
<?php <?php
$html = file_get_contents( './list.html'); //$html = file_get_contents( './list.html');
$token = "eyJpdiI6IlwvZDFiWk9rR3pDcU5hOWdHMWxwbXZ3PT0iLCJ2YWx1ZSI6IjRwYkUralJEbHBhZEg2S2tnOUV5citXVFd4TWY0TkxpVXllSE1BS1JMR0sxd3lcL0dYeVRKSkxvcitJb1ZFZFk5IiwibWFjIjoiMDEyODIyMTFhZDU3YjI1ODM4MmFhMmYxZGE4YjVhOGRjZjEzZmNlMzViMmExMDI3YjljMWNmOTYxMzhlMTM1YyJ9";
// Base64 解码
$decoded = base64_decode($token);
var_dump($decoded);die;
$dom = new DOMDocument(); $dom = new DOMDocument();
@$dom->loadHTML($html, LIBXML_NOERROR | LIBXML_NOWARNING); @$dom->loadHTML($html, LIBXML_NOERROR | LIBXML_NOWARNING);
$xpath = new DOMXPath($dom); $xpath = new DOMXPath($dom);
...@@ -17,6 +26,19 @@ $noticeNodes = $xpath->query("//div[@class='mdn-content-l']//ul[@class='link-lis ...@@ -17,6 +26,19 @@ $noticeNodes = $xpath->query("//div[@class='mdn-content-l']//ul[@class='link-lis
// 最佳方法:查找主内容区的公告列表 // 最佳方法:查找主内容区的公告列表
$mainContent = $xpath->query("//div[contains(@class, 'mdn-content-l')]//ul[@class='link-list']/li"); $mainContent = $xpath->query("//div[contains(@class, 'mdn-content-l')]//ul[@class='link-list']/li");
echo "主内容区公告数量: " . $mainContent->length . "\n"; echo "主内容区公告数量: " . $mainContent->length . "\n";
// 提取总页数(从尾页链接)
$lastPageNode = $xpath->query("//a[contains(text(), '尾页')]");
if ($lastPageNode->length > 0) {
$href = $lastPageNode->item(0)->getAttribute('href');
if (preg_match('/page=(\d+)/', $href, $matches)) {
$total_pages = (int)$matches[1];
}
}
var_dump($total_pages);
$allData = []; $allData = [];
if ($mainContent->length > 0) { if ($mainContent->length > 0) {
foreach ($mainContent as $index => $node) { foreach ($mainContent as $index => $node) {
...@@ -44,6 +66,16 @@ if ($mainContent->length > 0) { ...@@ -44,6 +66,16 @@ if ($mainContent->length > 0) {
$date = $timeNode ? trim($timeNode->nodeValue) : ''; $date = $timeNode ? trim($timeNode->nodeValue) : '';
$labels['date'] = $date; $labels['date'] = $date;
//时间超过2024年
var_dump($labels['date']);
var_dump(strtotime($labels['date']));
var_dump(strtotime('2024-11-01'));
if(strtotime($labels['date']) && strtotime($labels['date']) <= strtotime('2024-11-01')){
$dateFlag = 1;
var_dump(333);
}
// // 格式化输出 // // 格式化输出
// if (count($labels) >= 2) { // if (count($labels) >= 2) {
// // 通常第一个是地区,第二个是考试类型 // // 通常第一个是地区,第二个是考试类型
...@@ -60,7 +92,7 @@ if ($mainContent->length > 0) { ...@@ -60,7 +92,7 @@ if ($mainContent->length > 0) {
} }
} else { } else {
echo "未找到公告列表!尝试备用方法...\n"; dd( "未找到公告列表!尝试备用方法...");
// 备用方法:查找所有可能包含公告的li // 备用方法:查找所有可能包含公告的li
$allLi = $xpath->query("//li"); $allLi = $xpath->query("//li");
...@@ -94,7 +126,7 @@ if ($mainContent->length > 0) { ...@@ -94,7 +126,7 @@ if ($mainContent->length > 0) {
echo "通过备用方法找到公告数量: $noticeCount\n"; echo "通过备用方法找到公告数量: $noticeCount\n";
} }
var_dump($allData);die; //var_dump($allData);die;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment