response=client.chat.completions.create(model="qwen-plus",messages=[{"role":"system","content":"You are a helpful assistant that writes product descriptions."},{"role":"user","content":"Write a 50-word description for a portable Bluetooth speaker."},],temperature=0.7,max_tokens=200,)print(response.choices[0].message.content)print(f"Tokens used: {response.usage.prompt_tokens} in, {response.usage.completion_tokens} out")
stream=client.chat.completions.create(model="qwen-plus",messages=[{"role":"user","content":"Explain serverless computing in 3 sentences."},],stream=True,stream_options={"include_usage":True},)full_response=""forchunkinstream:delta=chunk.choices[0].delta.contentifdelta:full_response+=deltaprint(delta,end="",flush=True)# The last chunk with include_usage=True contains token counts
# This worksstream=client.chat.completions.create(model="qwen3-max",messages=[{"role":"user","content":"What is 127 * 389?"}],stream=True,extra_body={"enable_thinking":True},)reasoning=""answer=""forchunkinstream:delta=chunk.choices[0].delta# Thinking tokens come first, then the answerifhasattr(delta,"reasoning_content")anddelta.reasoning_content:reasoning+=delta.reasoning_contentifdelta.content:answer+=delta.content
1
2
3
4
5
6
7
# This FAILS with a 400 errorresponse=client.chat.completions.create(model="qwen3-max",messages=[{"role":"user","content":"What is 127 * 389?"}],extra_body={"enable_thinking":True},# Missing stream=True!)
response=client.chat.completions.create(model="qwen-plus",messages=[{"role":"system","content":"Extract product attributes. Return JSON with keys: name, category, price_range, target_audience.",},{"role":"user","content":"The AirPods Max are premium over-ear headphones by Apple, retailing at $549, aimed at audiophiles and professionals.",},],response_format={"type":"json_object"},)importjsondata=json.loads(response.choices[0].message.content)# {"name": "AirPods Max", "category": "headphones", "price_range": "premium", "target_audience": "audiophiles and professionals"}
tools=[{"type":"function","function":{"name":"get_weather","description":"Get current weather for a city","parameters":{"type":"object","properties":{"city":{"type":"string","description":"City name, e.g. 'Shanghai'"},"unit":{"type":"string","enum":["celsius","fahrenheit"]},},"required":["city"],},},}]response=client.chat.completions.create(model="qwen-plus",messages=[{"role":"user","content":"What is the weather like in Beijing today?"}],tools=tools,tool_choice="auto",)# The model returns a tool_call instead of a text responsetool_call=response.choices[0].message.tool_calls[0]print(f"Function: {tool_call.function.name}")print(f"Arguments: {tool_call.function.arguments}")# Function: get_weather# Arguments: {"city": "Beijing", "unit": "celsius"}
messages=[{"role":"system","content":"You are a cloud architecture advisor."},]defchat(user_input:str)->str:messages.append({"role":"user","content":user_input})response=client.chat.completions.create(model="qwen-plus",messages=messages,temperature=0.7,)assistant_msg=response.choices[0].message.contentmessages.append({"role":"assistant","content":assistant_msg})returnassistant_msg# Turn 1print(chat("I need to host a Python API with about 200 req/hour."))# Turn 2 -- the model remembers the contextprint(chat("Would serverless be cheaper than ECS for that?"))# Turn 3print(chat("What about cold starts?"))
务必监控 token 消耗。每轮对话都会将完整历史作为输入发送。对于长对话,建议实现滑动窗口或摘要策略。我通常限制在 20 轮以内,一旦超限,便将前 15 轮摘要为一条 system 消息。
texts=["ECS is Alibaba Cloud's virtual machine service.","OSS provides object storage similar to AWS S3.","Function Compute is a serverless execution engine.","PolarDB is a cloud-native distributed database.","DashScope is the API service for Qwen models.",]response=client.embeddings.create(model="text-embedding-v3",input=texts,)vectors=[item.embeddingforiteminresponse.data]print(f"Embedded {len(vectors)} texts, each {len(vectors[0])} dimensions")
importnumpyasnpdefcosine_similarity(a,b):returnnp.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b))# Embed the queryquery="How do I attach a disk to a virtual machine?"query_response=client.embeddings.create(model="text-embedding-v3",input=query,)query_vector=query_response.data[0].embedding# Compare against our document vectorssimilarities=[(texts[i],cosine_similarity(query_vector,vectors[i]))foriinrange(len(vectors))]similarities.sort(key=lambdax:x[1],reverse=True)fortext,scoreinsimilarities[:3]:print(f" {score:.4f}{text}")# 0.8234 ECS is Alibaba Cloud's virtual machine service.# 0.6891 OSS provides object storage similar to AWS S3.# ...
importosimporttimeimportrequestsAPI_KEY=os.environ["DASHSCOPE_API_KEY"]HEADERS={"Authorization":f"Bearer {API_KEY}","Content-Type":"application/json","X-DashScope-Async":"enable",}defcreate_video_task(prompt:str,size:str="1280*720",duration:int=5)->str:"""Submit a text-to-video generation task. Returns task_id."""url="https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis"payload={"model":"wan2.5-t2v-plus","input":{"prompt":prompt},"parameters":{"size":size,"duration":duration},}resp=requests.post(url,json=payload,headers=HEADERS)resp.raise_for_status()returnresp.json()["output"]["task_id"]defpoll_task(task_id:str,max_wait:int=600)->dict:"""Poll until task completes. Returns the full output dict."""url=f"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}"headers={"Authorization":f"Bearer {API_KEY}"}elapsed=0interval=5whileelapsed<max_wait:resp=requests.get(url,headers=headers)result=resp.json()status=result["output"]["task_status"]ifstatus=="SUCCEEDED":returnresult["output"]elifstatus=="FAILED":raiseRuntimeError(f"Task failed: {result['output'].get('message','unknown')}")time.sleep(interval)elapsed+=intervalinterval=min(interval*1.5,30)# Exponential backoff, cap at 30sraiseTimeoutError(f"Task {task_id} did not complete within {max_wait}s")defdownload_video(video_url:str,output_path:str):"""Download the video before the 24-hour expiry."""resp=requests.get(video_url,stream=True)resp.raise_for_status()withopen(output_path,"wb")asf:forchunkinresp.iter_content(chunk_size=8192):f.write(chunk)# Usagetask_id=create_video_task(prompt="A drone shot flying over Shanghai's Pudong skyline at sunset, cinematic, 4K quality",size="1280*720",duration=5,)print(f"Task submitted: {task_id}")output=poll_task(task_id)video_url=output["video_url"]print(f"Video ready: {video_url}")download_video(video_url,"shanghai_sunset.mp4")print("Downloaded to shanghai_sunset.mp4")
defcreate_i2v_task(prompt:str,image_url:str,duration:int=5)->str:"""Image-to-video: animate a starting frame."""url="https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis"payload={"model":"wan2.5-i2v-plus","input":{"prompt":prompt,"img_url":image_url,},"parameters":{"duration":duration},}resp=requests.post(url,json=payload,headers=HEADERS)resp.raise_for_status()returnresp.json()["output"]["task_id"]
defcreate_image_task(prompt:str,size:str="1024*1024")->str:"""Submit a text-to-image generation task."""url="https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis"payload={"model":"wanx2.1-t2i-plus","input":{"prompt":prompt},"parameters":{"size":size,"n":1},}resp=requests.post(url,json=payload,headers=HEADERS)resp.raise_for_status()returnresp.json()["output"]["task_id"]
importosimportdashscopefromdashscope.audio.tts_v2importSpeechSynthesizerdashscope.api_key=os.environ["DASHSCOPE_API_KEY"]synth=SpeechSynthesizer(model="qwen3-tts-flash",voice="Cherry")audio_bytes=synth.call("Welcome to the product demo. Today we will show you three new features.")withopen("demo_narration.mp3","wb")asf:f.write(audio_bytes)
importdashscopefromdashscope.audio.tts_v2importSpeechSynthesizerdashscope.api_key=os.environ["DASHSCOPE_API_KEY"]synth=SpeechSynthesizer(model="qwen3-tts-flash",voice="Ethan",format="mp3",sample_rate=24000,)# Streaming callbackchunks=[]defon_audio(data):chunks.append(data)synth.streaming_call(text="This is a longer piece of text that will be synthesized incrementally. ""Each chunk of audio is delivered as soon as it is ready, ""reducing time-to-first-audio for the user.",callback=on_audio,)withopen("streamed_output.mp3","wb")asf:forchunkinchunks:f.write(chunk)