In [3]:
import re
def split_long_text(text, max_length=100):
    # 使用正则表达式匹配中文“。”和英文“.”作为分隔符
    segments = re.split(r'(?<=[。\.])', text)
    result = []
    current_segment = ""
    
    for segment in segments:
        if len(current_segment) + len(segment) <= max_length:
            current_segment += segment
        else:
            if current_segment:
                result.append(current_segment)
            current_segment = segment
            
    # 添加最后一个片段
    if current_segment:
        result.append(current_segment)

    return result

def clean_no_need_text(text):
    # 去掉 \n 和多余的空白字符
    text = text.replace('\n', ' ').strip()
    
    # 使用正则表达式去除 emoji 和特殊符号
    # 这里的正则表达式匹配所有的 emoji 表情和特殊符号
    emoji_pattern = re.compile(
        "["
        "\U0001F600-\U0001F64F"  # Emoticons
        "\U0001F300-\U0001F5FF"  # Symbols & Pictographs
        "\U0001F680-\U0001F6FF"  # Transport & Map Symbols
        "\U0001F1E0-\U0001F1FF"  # Flags (iOS)
        "\U00002700-\U000027BF"  # Dingbats
        "\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
        "\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
        "\U00002500-\U00002BEF"  # Chinese characters
        "\U0001F600-\U0001F64F"
        "\U0001F680-\U0001F6FF"
        "]+", flags=re.UNICODE
    )
    
    cleaned_text = emoji_pattern.sub(r'', text)
    
    return cleaned_text

def normalize_text(text):
    # 删除换行符和多余的空格，将所有空白字符替换为单一空格
    text = re.sub(r'\s+', ' ', text).strip()

    # 替换标点符号
    text = re.sub(r'[…？！；]', '。', text)  # 替换省略号、问号、感叹号、分号为句号
    text = re.sub(r'[?!;]', '.', text)       # 替换英文问号、感叹号、分号为句号
    text = re.sub(r'[：、]', '，', text)
    text = re.sub(r'[:]', ',', text)

    # # 删除特殊字符
    text = re.sub(r'[《》【】（）“”’‘——￥]', '', text)  # 删除中文特殊字符
    text = re.sub(r'[<>\"\'\[\]\{\}\(\)\_\-\—+=*&%$#@]', '', text)  # 删除英文特殊字符

    # 将多个句号或逗号合并为一个
    text = re.sub(r'。+', '。', text)
    text = re.sub(r'，+', '，', text)
    text = re.sub(r'\.+', '.', text)

    return text

import torch
torch._dynamo.config.cache_size_limit = 64
torch._dynamo.config.suppress_errors = True
torch.set_float32_matmul_precision('high')
torch.backends.cudnn.enabled = False

import ChatTTS
from IPython.display import Audio

chat = ChatTTS.Chat()
chat.load_models()

spk = torch.load("seed_2155_restored_emb.pt")

params_infer_code = {
  'spk_emb': spk, # add sampled speaker 
  'temperature': .3, # using custom temperature
  'top_P': 0.7, # top P decode
  'top_K': 20, # top K decode
}

###################################
# For sentence level manual control.

# use oral_(0-9), laugh_(0-2), break_(0-7) 
# to generate special token in text to synthesize.
params_refine_text = {
  'prompt': '[oral_2][laugh_0][break_6]'
} 

INFO:ChatTTS.core:Load from cache: /home/ding/.cache/huggingface/hub/models--2Noise--ChatTTS/snapshots/c0aa9139945a4d7bb1c84f07785db576f2bb1bfa
INFO:ChatTTS.core:use cuda:0
INFO:ChatTTS.core:vocos loaded.
INFO:ChatTTS.core:dvae loaded.
INFO:ChatTTS.core:gpt loaded.
INFO:ChatTTS.core:decoder loaded.
INFO:ChatTTS.core:tokenizer loaded.
INFO:ChatTTS.core:All initialized.


In [8]:
import requests
import json

# API URL
url = "http://xxxxxxxxx/api/generate"

# 数据负载
payload = {
    "model": "llama3",
    "prompt": "如果希望你推荐给我一般关于投资理财的书籍，那会是哪一本，为什么?另外请用中文回答我",
    "stream": False
}

# 发出 POST 请求
response = requests.post(url, data=json.dumps(payload), headers={"Content-Type": "application/json"})
texts = ''
if response.status_code == 200:
    texts = [response.json()['response']][0]
    # print("raw:", texts)
    texts = clean_no_need_text(texts)
    # print("clean:", texts)
    texts = normalize_text(texts)
    # print("normalize:", texts)
    texts = split_long_text(texts, 100)
    # print("split:", texts)
else:
    print(f"Request failed with status code: {response.status_code}, Response text: {response.text}")
print(texts)

['如果你想了解投资理财，我强烈建议你阅读「A Random Walk Down Wall Street」wall street。这本书是投资大师 Burton G. Malkiel Jr.', ' 的经典著作，于1973年首次出版，并且已经成为投资领域的经典书籍。 为什么我会推荐这本书。理由如下， 1. 基础知识，这本书提供了投资理财的基本概念和原则，帮助读者了解投资市场和金融产品。 2.', ' 随机walk理论，Malkiel 在书中提出了随机 walk理论，这个理论认为股票市场的价格变化是随机的，而不是可以预测的。这本书将随机 walk理论作为核心概念，帮助读者理解市场的性质。 3.', ' 投资策略，A Random Walk Down Wall Street还讨论了各种投资策略，包括指数基金，股票投资，债券投资等。这些策略可以应用于不同投资目标和风险承担能力的人。 4.', ' 历史经验，书中还包含了历史事件和案例，帮助读者了解投资市场的发展和变化。', ' 总之，A Random Walk Down Wall Street是投资理财领域的一本经典书籍，提供了基础知识，随机 walk理论和投资策略等信息。', '阅读这本书可以帮助你建立投资观念和策略，并且为你的投资之路做好铺垫。 P.S. 这本书的中文翻译版已经出版，可以在 Amazon 等平台上购买。']


In [9]:
import torchaudio
if len(texts) > 0:
	# wavs = chat.infer(texts)
    wavs = chat.infer(texts, skip_refine_text=True, params_refine_text=params_refine_text, params_infer_code=params_infer_code)
    waveforms = []
    for wav in wavs:
        waveforms.append(torch.from_numpy(wav))
    merged_waveform = torch.cat(waveforms, dim=1)  # 在时间维度上拼接

    torchaudio.save("output.wav", merged_waveform, 24000, format="wav")
else:
	print(f"No response")

INFO:ChatTTS.core:All initialized.
 37%|███████████████████████████████████▉                                                              | 752/2048 [00:03<00:06, 205.68it/s]
