Commit c66a8490 by 白满斌

详情

parent 503eaa36
......@@ -37,61 +37,6 @@ class BatchDetail extends Command
protected $dealRecordModel;
//省-市-0-考试类型-招考公告
public static $ksType = [
// '公务员' => '-0-0-2-124',
// "事业单位" => "-0-0-3-124",
// "教师" => "-0-0-59-124",
"医疗" => "-0-0-60-124",
"选调" => "-0-0-7-124",
"遴选" => "-0-0-63-124",
"选调生" => "-0-0-62-124",
"三支一扶" => "-0-0-8-124",
"大学生村官" => "-0-0-9-124",
"基层工作者" => "-0-0-66-124",
"银行" => "-0-0-67-124",
"国企" => "-0-0-78-124",
"公益性岗位" => "-0-0-80-124",
"军队文职" => "-0-0-249-124",
];
public static $ksArea = [
3510 => "国家",
1117 => "安徽",
1 => "北京",
1255 => "福建",
2129 => "广东",
3191 => "甘肃",
2290 => "广西",
2723 => "贵州",
37 => "河北",
1849 => "湖北",
705 => "黑龙江",
1654 => "河南",
2429 => "海南",
1979 => "湖南",
627 => "吉林",
878 => "江苏",
1359 => "江西",
498 => "辽宁",
374 => "内蒙古",
3357 => "宁夏",
3304 => "青海",
2500 => "四川",
1482 => "山东",
859 => "上海",
232 => "山西",
3063 => "陕西",
19 => "天津",
2980 => "西藏",
3390 => "新疆",
2826 => "云南",
1004 => "浙江",
2460 => "重庆",
3508 => "香港",
3509 => "澳门",
3507 => "台湾",
];
protected $client;
......@@ -148,30 +93,36 @@ class BatchDetail extends Command
// 13164251418:0f196df3dcfbb78b83a4e1d18fc9fe9611d155dxb81cdeo6jkh9wuv81cdnap4evmis78 -- 9047780
// 17310088217:cbf6a15cf45c95f89a664f3534f64eae11d15fnc6h1cdetb9sj5pczd1cdnftumxibabd -- 26283113
// junjie:f65258b207d7f3a8af87063d74531ba211d15fliu41cdev1qkuochan1cdnhkbf90yemn -- 26198428
// 18600962376 0be20c74eeb01a059f2eecedcd0595f611d15cg6vm1cdeu6nwgd9voy1cdngp8qupvt0y -- 20910802 850
// bmb1:3d24d79a5420c8fe1af5d74f71fc72a211d15fk7z41cdeu6tzo6fsl41cdngpeu2j1px4 -- 26137696 web: day1: 745 day2: 157 day2: ios:102 封了
// 18842658542达:2e194a3f660309a9aebf4df66c64770d11d15fnwif1cdevv8kcx4c921cdnidter9q9l2 -- 26309463
/** app 接口 start */
// $param = [
// 'remindId'=>0,
// 'articleId'=>$articleId,
// 'from_device'=>'ios',
// 'timestamp'=>time(),
// 'appid'=>'8tQhhq32HIxOH72p',
// 'osVersion'=>'29',
// 'devVersion'=>'6.2.0.0',
// 'appLaunchChannel'=>'default',
// 'token'=>'3d24d79a5420c8fe1af5d74f71fc72a211d15fk7z41cdeu6tzo6fsl41cdngpeu2j1px4',
// 'userId'=>'26137696',
// ];
//
// ksort($param);
// $argStr = '';
// foreach ($param as $key => $val) {
// $argStr .= $key . "=" . trim($val, ' ') . "&";
// }
// $argStr2 = $argStr.'secret=GO1GgDeC1qIr7KcuGfZIGTHk5R3RM1KT';
//
// $signStr2 = md5(urlencode($argStr2));
//
// $url = 'https://api.gongkaoleida.com/api/v6_0_3_0/article/detail?'.$argStr.'&sign='.$signStr2;
$param = [
'remindId'=>0,
'articleId'=>$articleId,
'from_device'=>'ios',
'timestamp'=>time(),
'appid'=>'8tQhhq32HIxOH72p',
'osVersion'=>'29',
'devVersion'=>'6.2.0.0',
'appLaunchChannel'=>'default',
'token'=>'f65258b207d7f3a8af87063d74531ba211d15fliu41cdev1qkuochan1cdnhkbf90yemn',
'userId'=>'26198428',
];
ksort($param);
$argStr = '';
foreach ($param as $key => $val) {
$argStr .= $key . "=" . trim($val, ' ') . "&";
}
$argStr2 = $argStr.'secret=GO1GgDeC1qIr7KcuGfZIGTHk5R3RM1KT';
$signStr2 = md5(urlencode($argStr2));
$url = 'https://api.gongkaoleida.com/api/v6_0_3_0/article/detail?'.$argStr.'&sign='.$signStr2;
/** app 接口 end */
try {
......@@ -190,23 +141,24 @@ class BatchDetail extends Command
dd($value['id'], $content);
}
$this->saveRetToFile(json_encode($content), $value['id']);
// $sourceData = $content['data']['articleInfo'] ?? [];
// if(empty($sourceData)){
// dd('数据异常:'.$value['id']);
// }
// $from_url = $sourceData['sourcePageUrl'];
// $from_title = $sourceData['origin'];
// $from_detail = json_encode($sourceData);
//
// $updateData = [
// 'from_url' => $from_url,
// 'from_title' => $from_title,
// 'from_detail' => $from_detail,
// ];
// $LeidaModel->updateData(['id'=>$value['id']], $updateData);
dd($content);
/** app 接口 start */
// $this->saveRetToFile(json_encode($content), $value['id']);
/** app 接口 end */
$sourceData = $content['data']['articleInfo'] ?? [];
if(empty($sourceData)){
dd('数据异常:'.$value['id']);
}
$from_url = $sourceData['sourcePageUrl'];
$from_title = $sourceData['origin'];
$from_detail = json_encode($sourceData);
$updateData = [
'from_url' => $from_url,
'from_title' => $from_title,
'from_detail' => $from_detail,
];
$LeidaModel->updateData(['id'=>$value['id']], $updateData);
echo 'done:'.$value['id'].' total:'.$total.' ';
......
......@@ -53,37 +53,37 @@ class BatchExportUserPhone extends Command
];
public static $ksArea = [
// 3510 => "国家",
// 1117 => "安徽",
// 1 => "北京",
// 1255 => "福建",
// 2129 => "广东",
// 3191 => "甘肃",
// 2290 => "广西",
// 2723 => "贵州",
// 37 => "河北",
// 1849 => "湖北",
// 705 => "黑龙江",
// 1654 => "河南",
// 2429 => "海南",
// 1979 => "湖南",
// 627 => "吉林",
// 878 => "江苏",
// 1359 => "江西",
// 498 => "辽宁",
// 374 => "内蒙古",
// 3357 => "宁夏",
// 3304 => "青海",
// 2500 => "四川",
// 1482 => "山东",
// 859 => "上海",
// 232 => "山西",
// 3063 => "陕西",
// 19 => "天津",
// 2980 => "西藏",
// 3390 => "新疆",
// 2826 => "云南",
// 1004 => "浙江",
3510 => "国家",
1117 => "安徽",
1 => "北京",
1255 => "福建",
2129 => "广东",
3191 => "甘肃",
2290 => "广西",
2723 => "贵州",
37 => "河北",
1849 => "湖北",
705 => "黑龙江",
1654 => "河南",
2429 => "海南",
1979 => "湖南",
627 => "吉林",
878 => "江苏",
1359 => "江西",
498 => "辽宁",
374 => "内蒙古",
3357 => "宁夏",
3304 => "青海",
2500 => "四川",
1482 => "山东",
859 => "上海",
232 => "山西",
3063 => "陕西",
19 => "天津",
2980 => "西藏",
3390 => "新疆",
2826 => "云南",
1004 => "浙江",
2460 => "重庆",
// 3508 => "香港",
// 3509 => "澳门",
......
......@@ -7,6 +7,9 @@ use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use Illuminate\Support\Facades\Cache;
use App\Http\Services\Decryptor;
use Illuminate\Support\Facades\Storage;
use App\Models\LeidaModel;
class test extends Command
{
protected $signature = 'crawl:dynamic-token';
......@@ -20,298 +23,66 @@ class test extends Command
public function handle()
{
// 示例加密数据
$encryptedData = "K0retCPdI@kG41DJ5wRYc-W8RT9NsePsLMAknEnd4g7KeOxkrlicfiHfb*tts8VRwSzrjpguE1CYpfZH0\/8epwIayZrEoL5qFWBbhaBhzwLmYvuqRdBhQdWJ8yHbo362R8iHfiD+q66DYjwMZp2WPKtCjzQVtGHXU1qdFWm\/JSFeDjuYk8ZRivh0TTuuqUjlek+2RF8e16bIVFfScB+cLrImNFo1l\/fL7bPtpFDyZRdWVXnKZ\/iRLiLZAiAtTqB6VgpanceHU69NMLwqn3p7fQ6rqeZc6qINxZSOaMf53S\/Yqw2J5g2PzlQ+xpyhVv\/WiuIATqFy1ZWWWg9UeHbWa3PAywmZ201dvOMZatR03RC\/CsOVRKYNkFVYoIg7ECPKDMdZh70R9vwWYqivFde07G8+iStPC\/egJMp64GodBh2aKqwyv8\/3Hmy8UCB8zPobJGeKHE0nNUUpcCsBAMXKs4lQWqtyISFTXqEcXB\/ciISWjSN68k0gdCEoE0xDOdIayWsAS4ptwRTz\/SCNz7GHgcSIzar3RsKXVlvo\/R9jVCRekR5\/hLIovp\/phgHRMiKBSKp9\/HrvwN0rpsZULDy7iwmkX95C4RCWs8JDo3dczdeoVJz9HLCdInF9Fsi3K7+pEeTxNtL5pEBnjclvE1LCL9Nz+dGtt5RfIWADUyQkzQcbqvaPz8\/FTIINMC9hYYgTOx93nurSdG1bgjZTgMxxcGsHEGgxuGAc7U+hxRdALOHQbiFr9K1h4+PN1Ik79T6POvtoowPTO92goA+muISy8DWls2Whal9\/TFlAvqXI1rRzbJ6EU8GWHl1qtGRBoV7vGFanfbWPubOh1A74i2VysQdQJXh0wuHDU88h5otDNuvLLs39yhYVJfZJQCM9QnlLgEHtxU7sWZP+39pCDzn33mU6YxTxp6CkzJeWUoy555LtzSoHvwmOwQyiDG7XhnHYt41WYO7mTT7B+eJH7ev1mHoqPUAuFZa+yv27V5FifE8+\/3LjNdZ+tfXdCRAdOEn7H7gw\/MVuBpuFaVlbtSUiNNksVTzbCRD9xMzuTvClvJwWmS9q\/Iu37yx7CyPiHEG\/0KSVOKEFn8GPKjpjw2FoQO8ScDFKLclhNIzo0qIUAdmSHOnn8VzDFUrfNgTON1sDSQ2gA\/FTYNYl+oaZL1l57LoCviaJHdbcGQZ2qZbsqmPGaF5BkvwMJ6bWJWDpMA\/8VcpPYj\/mvRiA+aJfeB\/U61YwNDGJ17Ac8SGOCBItL5eEchk993Mq2FR\/ziKNKZb10r5IRsgns5pPn7GoJT\/83vyprEQuMrNAu7xuL\/pcda1xTH35eFS9E5A\/CvdxeYgjCGl0gvpSOAxCSF5HtlFgZ\/PCAVyTMyKMZbKwdxA2LPZ+nQpM19mwRKiKvo57Giu74wCjUhvz\/95kAUWdsgQUa6dYsDPuqKhL5Qr6dN2DNojpb8WLZ3zhZIwPZHWIF5ogqiQwZPwlryadIo1tqveqZbTQ0SOJ8HJZ0DuDTWUXrzAQPWgCw\/\/6aWtSmuOgUGDBeVM2BxK77LUHeYngIuKyb4wiPymx8OHo\/KZWTymcmyrLi5+pQMsoITVgq5oDNZpUSFzIfnqqjduplmszyKphHddmfcBqb5qFaeSjnH7XQNWmYiigo8moCN3KveJHcoBcPnfJiyaOjfvpLaUgnuZYbAnIW5bayPQZKiIphTB1CnMN9GQOwuOlhcF3\/dMdIyjofne7wzKPIm1dgaQtGOvfdU5NZYzmczniHwhDuQpLs0J8TZg1\/DZ1Y6HuhMDH6hycOm7LTE+XvDaQLrKWpKT5\/ObVNMQWtQsyTSuXuvnYqJV6+8DUvs70CmislkNa4LeCLisQE2F1hwbBcMR9ZY4Wuw9MWnaqoYwm8z5GYZkVoKta125fk\/GcdmxvNfX2GT8iUAwIaubr0u2BXKV4ncIIUAhkW4kynIfYY\/18eTmrpaZUVhfcuFSZvSYWQEDUqPJM8s+8rnp6uMMCpVIGw3l5+PW2gkOMC1ncbpGvB4MXSoILrMQQXcj2fzQ8T\/uuZAwqZgGjuE141PuHRd4aJp8MCac5rJIQ9aE1wLzheZHVqG6602TaugjPray74ztjb5XgPzNOvZqF2HpUuAogyVYza++1BuN7uCAV0ofCdrA73iILOKZm0j0+mOi94TngskhdfoX3opS1Pjv17hSH3Jyg29B\/6JG7FHKzuC+78HRBxdxS16mjTG\/6d+rv\/CzBq5YKkJhe7uhy3v1IGrK7d0S9LZklJmcF+jIO8vXc6VHsaytmi5zXB\/+dUAExuz02ubupFFdl\/mw4xyTaG1BTcw+ScRihz7ai0IeC7e8T1RsFsj5ZlWIT6GDyf9TQnesgpOViCa\/kfcJb6LwJF3EN0T4pJYWHtlZ5lGcF6sB3fCMkk1Ry+LDVrtyQqLsSyuOV\/\/Hl3bW16ikfyYMRDlJZE+8n0vAfJcyN5\/5s4kUb2Ir4gIIZp5O8N7JKvGJRiw9XNiK\/qlHFUVrZOIFeWGR0akiGp0OM7\/kHs4QsZnwUp3bxLk4Nu4w5fTurCTOyjdJ+I6pGjnhjTAZW32NGkqrythcOfppcfjsw\/BuODICaidoGVUUtJruyGpqqomehxGQiueJXsAWOAFrDJ5OulO79kcIKxop7UOE8DQUAZnQOBeQe0UmkgN6WoRcQ41wRP1w19j9b1apw4k\/j\/MYefPFMTfaJiTQNPO5MKo3uqk8nwx3tXJATmxe+F+wYig9ZrdsmEWs5kt+RCY0xISn9K02\/S3WacQArfmj02vdsyzBrUBkP0JrZCo6H7VCdIrFdAk1nsB67+DzkDTrJwlnb7G0OUnMH8QBv5oPHU2CJc\/YHxme+zEszsg6MEedNdrEM+oPEsP3LY2mlqWcSN6GljqqctBSk0QDEBA84W9B+Y1whW09w46isyLsulrctBu8ETqvpeATEeFJWHvirVXVYv3er1+H84KV2uZxQXRYv15Sbq44\/msseWRDsdVcmP5ibihwAOFUOrr9DwnR+urX5CoSc\/EwJJ+t6WV+WWziUY4P2h\/52txc4dlB1meT8x0Jy9iWmMtTsNKv\/xscsv7jyrmoSfaKEBjWcUvkNi0iJjffQP55qxNbQnk4aUh42u7euSXjgS3YKwCdf+J3Tyif1JDEgd02hNhdy+bQ6i3WiDPZ8hVwcJHlAEY7E+VZ5Qc9zUyNaV6G1CjtAp4oiEYQgXKL55DbmMAD+UHeWCNf5jIPG0dZcdeSTTeDFBKLICVRSLxjIsIMYNwoISWmVoSJJRbR42c4Y6XlppE7gbAqUsr+dUiq2jrb70xCrlu5DJyOQZXOAp2WocSJEJsA6Wmduv10XPQRMm\/SSwVWgtRwlHszr5pBZUvqYMkggA6xzDeJ9YsqKf99x6C3gHROgeeS6uRiWVZtyVh6p7uHTtxG7aL6qEaQwsyheC09tI+3Yr5isaLxVBUDLYtG6mjaqKdNImtGEj9WxO9kuEZmqdBE4Au40jGGMRm5OTF+uTKdTzN2z6ZSJAv+\/EjZULRHmpkShdiWaH5qvJ7PxlxYpQ==";
try {
// 方法1:简单解密
$decrypted = Decryptor::decrypt($encryptedData);
// 方法2:获取详细结果
$result = Decryptor::decryptWithResult($encryptedData);
dd($result);
if ($result['success']) {
dd([
'success' => true,
'data' => json_decode($result['data'], true), // 假设返回JSON
'original' => $encryptedData
]);
} else {
dd([
'success' => false,
'error' => $result['error'],
'exception' => $result['exception'] ? $result['exception']->getMessage() : null
], 400);
}
} catch (\Exception $e) {
dd([
'success' => false,
'error' => 'Decryption failed',
'message' => $e->getMessage()
], 500);
}
return 0;
}
private function initializeClient(): void
{
$this->cookieJar = new CookieJar();
$this->client = new Client([
'base_uri' => $this->baseUrl,
'cookies' => $this->cookieJar,
'timeout' => 30,
'headers' => [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'zh-CN,zh;q=0.9',
'Accept-Encoding' => 'gzip, deflate, br',
],
]);
}
/**
* 启动会话
*/
private function startSession(): void
{
$this->info('启动会话...');
// 访问首页获取初始token
$response = $this->client->get('/');
// 提取token
$this->extractTokensFromResponse($response);
// 访问API端点激活会话
$this->activateSession();
$this->info('会话启动完成');
}
/**
* 从响应中提取token
*/
private function extractTokensFromResponse($response): void
{
// 从Cookie获取
$cookies = $this->cookieJar->toArray();
foreach ($cookies as $cookie) {
if (isset($cookie['Name'])) {
$name = $cookie['Name'];
$value = $cookie['Value'] ?? '';
if ($name === 'XSRF-TOKEN') {
$this->sessionData['xsrf_token'] = $value;
$this->sessionData['xsrf_decoded'] = urldecode($value);
} elseif ($name === 'gkld_session') {
$this->sessionData['session'] = $value;
}
}
}
$LeidaModel = new LeidaModel();
// 从HTML中获取CSRF token
$body = $response->getBody()->getContents();
if (preg_match('/<meta name="csrf-token" content="([^"]+)"/', $body, $matches)) {
$this->sessionData['csrf_token'] = $matches[1];
}
// 记录token信息
$this->logTokenInfo();
}
/**
* 激活会话
*/
private function activateSession(): void
{
try {
// 发送一个AJAX请求,模拟用户活动
$response = $this->client->get('/api/session/keepalive', [
'headers' => [
'X-Requested-With' => 'XMLHttpRequest',
'X-CSRF-TOKEN' => $this->sessionData['csrf_token'] ?? '',
'X-XSRF-TOKEN' => $this->sessionData['xsrf_decoded'] ?? '',
],
]);
$path = storage_path('app/crawled/');
// 更新token
$this->extractTokensFromResponse($response);
// 获取文件夹下所有文件 docker run --rm php-with-java:latest java -version
$files = Storage::allFiles('crawled');
} catch (\Exception $e) {
// 忽略错误,不是所有网站都有这个端点
}
}
// dd(count($files));
// 2000-8000
/**
* 智能请求(自动处理token刷新)
*/
private function smartRequest(string $url, int $maxRetries = 3): array
{
$retryCount = 0;
while ($retryCount < $maxRetries) {
try {
// 确保token有效
$this->ensureTokenValid();
$results = [];
$response = $this->client->get($url, [
'headers' => $this->getRequestHeaders($url),
]);
foreach ($files as $file) {
// 获取文件名(不带路径)
$filename = basename($file);
$number = (int) pathinfo($filename, PATHINFO_FILENAME);
dd($url, $response->getBody()->getContents());
// 更新token
$this->extractTokensFromResponse($response);
// 使用正则匹配数字.txt格式的文件名
if (preg_match('/^\d+\.txt$/i', $filename)) {
// 读取文件内容
$content = Storage::get($file);
return [
'success' => true,
'status' => $response->getStatusCode(),
'content' => $response->getBody()->getContents(),
];
} catch (\Exception $e) {
$retryCount++;
// 如果是token相关错误,刷新token
if ($this->isTokenError($e)) {
$this->refreshToken();
}
// 添加到结果数组
$LeidaModel->updateData(['id'=>$number], ['from_encr'=> json_encode(json_decode($content, true), JSON_UNESCAPED_UNICODE)]);
if ($retryCount >= $maxRetries) {
return [
'success' => false,
'error' => $e->getMessage(),
];
dump($number);
}
sleep(pow(2, $retryCount)); // 指数退避
}
}
return ['success' => false, 'error' => 'Max retries exceeded'];
}
/**
* 确保token有效
*/
private function ensureTokenValid(): void
{
$lastUpdate = Cache::get('token_last_update', 0);
$interval = $this->option('interval');
if (time() - $lastUpdate >= $interval) {
$this->refreshToken();
}
}
/**
* 刷新token
*/
private function refreshToken(): void
{
$this->info('刷新token...');
try {
// 访问一个轻量级页面
$response = $this->client->get('/');
$this->extractTokensFromResponse($response);
Cache::put('token_last_update', time(), 300);
$this->info('Token刷新完成');
} catch (\Exception $e) {
$this->error('刷新token失败: ' . $e->getMessage());
}
}
/**
* 获取请求头
*/
private function getRequestHeaders(string $url): array
{
$headers = [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'zh-CN,zh;q=0.9',
'Accept-Encoding' => 'gzip, deflate, br',
'Referer' => $this->baseUrl . '/',
];
// 添加CSRF token
if (!empty($this->sessionData['csrf_token'])) {
$headers['X-CSRF-TOKEN'] = $this->sessionData['csrf_token'];
}
// 添加XSRF token
if (!empty($this->sessionData['xsrf_decoded'])) {
$headers['X-XSRF-TOKEN'] = $this->sessionData['xsrf_decoded'];
dd([
'success' => false,
'error' => 'Decryption failed',
'message' => $e->getMessage()
], 500);
}
return $headers;
}
/**
* 检查是否是token错误
*/
private function isTokenError(\Exception $e): bool
{
$message = $e->getMessage();
return strpos($message, '419') !== false ||
strpos($message, 'CSRF') !== false ||
strpos($message, 'Token') !== false;
return 0;
}
/**
* 记录token信息
* 保存HTML内容到文件
*/
private function logTokenInfo(): void
public function saveRetToFile(string $html, $id): string
{
$info = [];
$filename = $id . '.txt';
$path = storage_path('app/crawled/' . $filename);
if (isset($this->sessionData['csrf_token'])) {
$info['CSRF Token'] = substr($this->sessionData['csrf_token'], 0, 20) . '...';
// 确保目录存在
if (!is_dir(dirname($path))) {
mkdir(dirname($path), 0755, true);
}
if (isset($this->sessionData['xsrf_token'])) {
$info['XSRF Token'] = substr($this->sessionData['xsrf_token'], 0, 20) . '...';
}
if (isset($this->sessionData['session'])) {
$info['Session'] = substr($this->sessionData['session'], 0, 20) . '...';
}
file_put_contents($path, $html);
$this->info('当前Token状态:');
foreach ($info as $key => $value) {
$this->line(" {$key}: {$value}");
}
}
/**
* 处理结果
*/
private function processResult(array $result): void
{
$this->info('爬取成功!');
$this->info("状态码: {$result['status']}");
$this->info("内容长度: " . strlen($result['content']) . " 字节");
if ($this->option('save')) {
$this->saveResult($result['content']);
}
}
/**
* 保存结果
*/
private function saveResult(string $content): void
{
$filename = storage_path('app/crawled/' . date('Ymd_His') . '.html');
file_put_contents($filename, $content);
$this->info("结果已保存到: {$filename}");
return $path;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment