# # 0.5.11 的老版本 -- 支持python3.6以下
# !pip install pyecharts==0.5.11 --user
# 1.0 以上的新版本 -- 支持python3.6以上 ，不向下兼容，使用时注意，否则会运行失败
# 安装 v1 以上版本

#向阳测试结论： 可以作为小市值逃顶的反向指标，拥挤度越低小市值交易越区域顶部区域


#尝试写了一个A股拥挤率指标的研究，就是计算A股成交额排名前5%的股票合计成交额占比全市场成交额的比值，超过50%时，算比较拥挤，适当减仓。
#虽然之前已经有朋友实现过了，但是应楼主的请求，我这边做了一个改版，速度应该有所加快（主要是取数逻辑，单个取数，会很慢，一次性取大量数据，内存不够，所以是分片去取的）
#另外增加了一个功能，就是对比上证指数，看起来还比较清晰明了
#最后说说我的一个结论吧，我感觉这个指标还是有一定的意义的，18年初和21年初这个指标都到达了50%以上，反应了大盘股（我的理解，小盘股很难有太大的成交）抱团现象比较严重。
#正如宽友@周新一 所说：拥挤率指标可以作为大盘择时的一个因子，人太多的地方也是风险最多的地方，真正聪明的资金都是提前嗅到血腥味，提前撤退，别人恐惧我贪婪，别人贪婪我恐惧！
#不过我有两个问题，大家也可以一起讨论
#1.这个指标可以预测上极值逃顶，为什么不能预测下极值抄底呢，至少我感觉图中看不出来
#2.大于50就是顶吗？也许他是在上升的半山腰呢？我这么理解也不知道对不对



!pip install pyecharts -U --user
!pip install snapshot_selenium --user

WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.
Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: pyecharts in ./.local/lib/python3.6/site-packages (2.0.8)
Requirement already satisfied: jinja2 in /opt/conda/lib/python3.6/site-packages (from pyecharts) (2.10)
Requirement already satisfied: simplejson in /opt/conda/lib/python3.6/site-packages (from pyecharts) (3.16.0)
Requirement already satisfied: prettytable in /opt/conda/lib/python3.6/site-packages (from pyecharts) (0.7.2)
Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.6/site-packages (from jinja2->pyecharts) (1.1.0)
WARNING: You are using pip version 21.3; however, version 21.3.1 is available.
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.
WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.
Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: snapshot_selenium in ./.local/lib/python3.6/site-packages (0.0.2)
Requirement already satisfied: selenium in /opt/conda/lib/python3.6/site-packages (from snapshot_selenium) (3.141.0)
Requirement already satisfied: urllib3 in /opt/conda/lib/python3.6/site-packages (from selenium->snapshot_selenium) (1.24.1)
WARNING: You are using pip version 21.3; however, version 21.3.1 is available.
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.

# 去掉红色的警告框
import warnings
warnings.filterwarnings("ignore")

# from jqdata import *
# import datetime
# import pandas as pd

# # date_now的过去180天
# date_now = datetime.date(2021, 3, 21)
# days = 600

# dict_crowd = {}
# trade_days = get_trade_days(end_date=date_now, count=days)
# for date1 in trade_days:
#     all_stocks = list(get_all_securities(date=date1).index)
#     h = get_price(all_stocks, end_date=date1, frequency='1d', fields='money',
#                   count=1, panel=False).sort_values(by='money', ascending=False)
#     #
#     n_five_pct = int(len(h) / 20)   # 5%
#     n_crowd = h.iloc[:n_five_pct]['money'].sum() / h['money'].sum()
#     dict_crowd[date1] = n_crowd * 100
# #
# df_crowd = pd.DataFrame.from_dict(dict_crowd, orient='index',columns=['crowd_rate',])
# df_crowd.plot()

from jqdata import *
import datetime
import pandas as pd

# 画图相关的包
from pyecharts.charts import Bar
from pyecharts.charts import Line
from pyecharts import options as opts

from pyecharts.render import make_snapshot
# 使用 snapshot-selenium 渲染图片
from snapshot_selenium import snapshot

def crowd_line(daycount, date_now=datetime.datetime.now().strftime('%Y-%m-%d'), piece_count=100):
    all_df_crowd = get_all_crowd_rate_df(daycount, date_now, piece_count)
    
    time_list = list(all_df_crowd.index)

    hs300_df = get_price('000001.XSHG', start_date=time_list[0] ,end_date=time_list[-1], frequency='1d', fields='close',panel=False)
    hs300_list = list(hs300_df['close'])
    hs300_list = [round(x/100,2) for x in hs300_list]

    crowd_rate_list = all_df_crowd['crowd_rate']
    line = produce_line(time_list, hs300_list, crowd_rate_list)
    return line
    
def produce_line(time_list, hs300_list, crowd_rate_list):
    line = (
        Line()
        .add_xaxis(xaxis_data = time_list)
        .add_yaxis(series_name = "拥挤率",
                       y_axis = crowd_rate_list,
                       markpoint_opts=opts.MarkPointOpts(
                            data=[
                                opts.MarkPointItem(type_="max", name="最大值"),
                                opts.MarkPointItem(type_="min", name="最小值"),
                            ]
                        ),
                        markline_opts=opts.MarkLineOpts(
                            data=[opts.MarkLineItem(type_="average", name="平均值")]
                        ),
                    )
        .add_yaxis(series_name = "上证指数收盘价(已处理过，除以100)",
                       y_axis = hs300_list,
                       markpoint_opts=opts.MarkPointOpts(
                            data=[
                                opts.MarkPointItem(type_="max", name="最大值"),
                                opts.MarkPointItem(type_="min", name="最小值"),
                            ]
                        ),
                        markline_opts=opts.MarkLineOpts(
                            data=[opts.MarkLineItem(type_="average", name="平均值")]
                        ),
                    )
        .set_global_opts(
            title_opts=opts.TitleOpts(title="成交前5%拥挤率"),
            tooltip_opts=opts.TooltipOpts(trigger="axis"),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            datazoom_opts=[opts.DataZoomOpts(orient="horizontal"),opts.DataZoomOpts(type_="inside")],
            yaxis_opts=opts.AxisOpts(name = '拥挤率', is_scale= True),
            xaxis_opts=opts.AxisOpts(type_="category", name = '交易日'),
        )
    )
    return line

# 以下方法可以设置双y轴
# x = Faker.choose()
# scatter1 = (
#     Line()
#     .add_xaxis(x)
#     .add_yaxis("商家A", Faker.values(), yaxis_index=0,)
#     .extend_axis(yaxis=opts.AxisOpts())
# #     .set_global_opts(yaxis_opts=opts.AxisOpts(type_="value", name="商家A", position="right"))
# )
# scatter2 = (
#     Line()
#     .add_xaxis(x)
#     .add_yaxis("商家B", [v/1000 for v in Faker.values()], yaxis_index=1)
# #     .extend_axis(yaxis=opts.AxisOpts(type_="value", name="商家B", position="left"))
# #     .set_global_opts(yaxis_opts=opts.AxisOpts(type_="value", name="商家B", position="left"))
# )
# scatter1.overlap(scatter2)
# scatter1.render_notebook()


def get_all_crowd_rate_df(daycount, date_now, piece_count):
    # 数据量比较大，需要分开取数据, 每次取100条
    data_count = int((daycount-1) / piece_count) + 1
    end_date = date_now
    all_stocks = list(get_all_securities(date=date_now).index)
    all_df_crowd = pd.DataFrame(columns=['date', 'crowd_rate']).set_index('date')
    
    trade_dates = get_trade_days(start_date=None, end_date=end_date, count=daycount)
    
    frames = []
    for i in range(data_count):
        last_date = None
        last_count = piece_count*(i+1)
        if piece_count*(i+1) > daycount:
            last_count = daycount
        
        piece_trade_dates = trade_dates[piece_count*i : last_count]
        piece_trade_dates_str = [trade.strftime('%Y-%m-%d') for trade in piece_trade_dates]
        df = get_all_df(piece_trade_dates[0], piece_trade_dates[-1], all_stocks)
        df_crowd = get_crowd_rate_df(df)
        frames.append(df_crowd)
    d = pd.concat(frames)
    return d
        
        
def get_all_df(start_date, end_date, all_stocks):
    dict_crowd = {}
    # trade_days = get_trade_days(end_date=date_now, count=day_count)
    all_df = get_price(all_stocks, start_date=start_date, end_date=end_date, frequency='1d', fields='money',panel=False)

    all_df = all_df.sort_values(['time', 'money'], ascending = [True, False])
    all_df = all_df.set_index('time',drop=True)
    return all_df
    
def get_crowd_rate_df(all_df):
    date_set = set(list(all_df.index))

    df_crowd = pd.DataFrame(columns=['date', 'crowd_rate']).set_index('date')

    for date_str in date_set:
        date_str = date_str.strftime('%Y-%m-%d')
        df = all_df[all_df.index == date_str]
        df = df.sort_values('money', ascending = False)
        all_money = df['money'].sum()
        df = df[df['money'] >= df.iloc[int(len(df) * 0.05)]['money']]
        crowd_rate = df['money'].sum() / all_money
        df_crowd.loc[date_str, 'crowd_rate'] =  round(crowd_rate*100,1)
    df_crowd = df_crowd.sort_values('date')
    return df_crowd
print('over')

over

#过去的天数
day_count = 365
# date_now = datetime.datetime.now().strftime('%Y-%m-%d')
# piece_count = 10
line = crowd_line(day_count)

line.render_notebook()
#     line.render_notebook()
#     line.render('a.html')