logo
0
1
WeChat Login
# 1. 安装Python3 (如果需要) sudo apt update && sudo apt install -y python3 python3-pip python3-venv # 2. 进入项目目录 cd /workspace # 3. 使用UV创建虚拟环境 uv venv --python 3.11 # 4. 激活虚拟环境 source .venv/bin/activate # 5. 安装依赖 uv pip install -r requirements.txt uv pip install -e . # 6. 运行应用 python app.py

originVideoKey 定位方法和完整JSON

定位方法代码片段

import requests import re import json def extract_origin_video_key(url): """提取小红书视频的originVideoKey""" # 使用手机版User Agent(关键!) mobile_headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1' } r = requests.get(url, headers=mobile_headers, timeout=30) if r.status_code == 200: # 方法1: 直接匹配 originVideoKey pattern = r'"originVideoKey"\s*:\s*"([^"]+)"' match = re.search(pattern, r.text) if match: origin_video_key = match.group(1) # 替换转义的Unicode origin_video_key = origin_video_key.replace('\\u002F', '/') # 构造无水印视频URL url_without_watermark = f"http://sns-video-bd.xhscdn.com/{origin_video_key}" return { "originVideoKey": origin_video_key, "url_without_watermark": url_without_watermark } return None # 使用示例 result = extract_origin_video_key("http://xhslink.com/o/AEc2QuZgPnW") print(result)

完整JSON结构示例

originVideoKey 位于HTML中的 video.consumer 对象内,完整结构如下:

{ "video": { "media": { "stream": { "h264": [ { "width": 1280, "videoCodec": "h264", "videoBitrate": 478960, "audioChannels": 2, "vmaf": -1, "streamDesc": "MINI_APP_259", "defaultStream": 0, "avgBitrate": 547696, "streamType": 259, "audioDuration": 133000, "rotate": 0, "weight": 62, "videoDuration": 133000, "masterUrl": "http://sns-video-hw.xhscdn.com/stream/79/110/259/01e9169fca2b5717010370039a8060f17b_259.mp4", "backupUrls": [ "http://sns-bak-v1.xhscdn.com/stream/79/110/259/01e9169fca2b5717010370039a8060f17b_259.mp4", "http://sns-bak-v6.xhscdn.com/stream/79/110/259/01e9169fca2b5717010370039a8060f17b_259.mp4" ], "ssim": 0, "format": "mp4", "audioCodec": "aac", "height": 720, "duration": 133000, "volume": 0, "psnr": 0, "size": 9105458, "fps": 24, "audioBitrate": 64057, "hdrType": 0, "qualityType": "HD" } ], "h265": [ { "audioDuration": 133000, "masterUrl": "http://sns-video-hw.xhscdn.com/stream/79/110/114/01e9169fca2b57174f0370019a806214ac_114.mp4", "psnr": 50.132999420166016, "qualityType": "HD", "width": 1280, "volume": 0, "avgBitrate": 430781, "vmaf": -1, "defaultStream": 0, "size": 7161742, "backupUrls": [ "http://sns-bak-v1.xhscdn.com/stream/79/110/114/01e9169fca2b57174f0370019a806214ac_114.mp4", "http://sns-bak-v6.xhscdn.com/stream/79/110/114/01e9169fca2b57174f0370019a806214ac_114.mp4" ], "streamDesc": "X265_MP4_WEB_114_h5", "videoDuration": 133000, "duration": 133000, "audioBitrate": 128115, "audioChannels": 2, "streamType": 114, "height": 720, "videoBitrate": 297904, "weight": 62, "fps": 24, "videoCodec": "hevc", "rotate": 0, "hdrType": 0, "format": "mp4", "ssim": 0, "audioCodec": "aac" } ], "h266": [], "av1": [] }, "videoId": 137666139158959890, "video": { "streamTypes": [259, 114], "bizName": 110, "bizId": "281781327238692381", "duration": 133, "md5": "8513e3b4c4e36cff57fe6078cd1b1aff", "hdrType": 0, "drmType": 0 }, "image": { "thumbnailFileid": "frame/110/0/01e9169fca2b57170010000000019a8060b37f_0.webp" }, "capa": { "duration": 133 }, "consumer": { "originVideoKey": "spectrum/1040g35031orot7m514105q062k5lvrg2g3vh458" } } } }

关键点说明

  1. 必须使用手机版User Agent - PC版HTML中不包含originVideoKey
  2. originVideoKey位置 - 在video.media.consumer.originVideoKey路径下
  3. 无水印视频URL构造 - http://sns-video-bd.xhscdn.com/{originVideoKey}
  4. 转义字符处理 - HTML中的\u002F需要替换为/

示例输出

{ "originVideoKey": "spectrum/1040g35031orot7m514105q062k5lvrg2g3vh458", "url_without_watermark": "http://sns-video-bd.xhscdn.com/spectrum/1040g35031orot7m514105q062k5lvrg2g3vh458" }

获取consumer对象的完整代码片段

从HTML中提取consumer对象

import requests import re import json def get_consumer_object_from_html(url): """ 从HTML中提取consumer对象的完整代码片段 步骤: 1. 使用手机版User Agent获取HTML 2. 查找originVideoKey的位置 3. 向前查找"consumer": { 4. 向后平衡查找对应的} 5. 提取完整的consumer对象 """ # 步骤1: 使用手机版User Agent获取HTML mobile_headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1' } r = requests.get(url, headers=mobile_headers, timeout=30) if r.status_code != 200: return None # 步骤2: 查找originVideoKey的位置 pattern = r'"originVideoKey"\s*:\s*"([^"]+)"' match = re.search(pattern, r.text) if not match: return None pos = match.start() # 步骤3: 向前查找 "consumer": { consumer_pattern = r'"consumer"\s*:\s*\{' # 从originVideoKey位置向前搜索(最多向前5000字符) search_start = max(0, pos - 5000) search_text = r.text[search_start:pos] consumer_matches = list(re.finditer(consumer_pattern, search_text)) if not consumer_matches: return None # 取最后一个匹配(最接近originVideoKey的) consumer_match = consumer_matches[-1] json_start = search_start + consumer_match.start() # 步骤4: 向后查找对应的 }(平衡大括号) json_end = json_start + len(consumer_match.group()) brace_count = 1 while json_end < len(r.text) and brace_count > 0: if r.text[json_end] == '{': brace_count += 1 elif r.text[json_end] == '}': brace_count -= 1 json_end += 1 # 步骤5: 提取完整的consumer对象 consumer_json_str = r.text[json_start:json_end] # 解析JSON try: # 替换转义的Unicode fixed_json = consumer_json_str.replace('\\u002F', '/') # 去掉前面的 "consumer": 键 if fixed_json.startswith('"consumer":'): fixed_json = fixed_json[11:] consumer_data = json.loads(fixed_json) return { "consumer_raw": consumer_json_str, # HTML中的原始字符串 "consumer_data": consumer_data, # 解析后的JSON对象 "originVideoKey": consumer_data.get("originVideoKey", "").replace('\\u002F', '/') } except Exception as e: print(f"无法解析JSON: {e}") return None # 使用示例 result = get_consumer_object_from_html("http://xhslink.com/o/AEc2QuZgPnW") if result: print("HTML中的原始consumer对象:") print(result['consumer_raw']) print("\n解析后的consumer对象:") print(json.dumps(result['consumer_data'], indent=2, ensure_ascii=False)) print("\noriginVideoKey:", result['originVideoKey'])

输出示例

HTML中的原始consumer对象: "consumer":{"originVideoKey":"spectrum\\u002F1040g35031orot7m514105q062k5lvrg2g3vh458"} 解析后的consumer对象: { "originVideoKey": "spectrum/1040g35031orot7m514105q062k5lvrg2g3vh458" } originVideoKey: spectrum/1040g35031orot7m514105q062k5lvrg2g3vh458

代码说明

  1. 定位originVideoKey - 使用正则表达式"originVideoKey"\s*:\s*"([^"]+)"找到位置
  2. 向前搜索consumer - 从originVideoKey位置向前查找"consumer": {
  3. 平衡大括号 - 向后查找对应的闭合大括号}
  4. 处理转义字符 - 将\u002F替换为/
  5. 解析JSON - 去掉键名后解析为JSON对象

About

小红书无水印图片py https://iecho.cc/2024/03/03/decode-xiaohongshu-video-url/

Language
Python95.4%
C1.9%
HTML1.5%
C++0.4%
Others0.8%