arshy commited on
Commit
0869b01
1 Parent(s): 8801d28

initial commit

Browse files
.gitattributes CHANGED
@@ -16,6 +16,7 @@
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
 
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
  *.pickle filter=lfs diff=lfs merge=lfs -text
21
  *.pkl filter=lfs diff=lfs merge=lfs -text
 
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.csv filter=lfs diff=lfs merge=lfs -text
20
  *.pb filter=lfs diff=lfs merge=lfs -text
21
  *.pickle filter=lfs diff=lfs merge=lfs -text
22
  *.pkl filter=lfs diff=lfs merge=lfs -text
app copy.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ tools = pd.read_csv("./data/tools.csv")
5
+ # all_trades = pd.read_csv('./data/all_trades_profitability.csv')
6
+
7
+ demo = gr.Blocks()
8
+
9
+ INC_TOOLS = [
10
+ 'prediction-online',
11
+ 'prediction-offline',
12
+ 'claude-prediction-online',
13
+ 'claude-prediction-offline',
14
+ 'prediction-offline-sme',
15
+ 'prediction-online-sme',
16
+ 'prediction-request-rag',
17
+ 'prediction-request-reasoning',
18
+ 'prediction-url-cot-claude',
19
+ 'prediction-request-rag-claude',
20
+ 'prediction-request-reasoning-claude'
21
+ ]
22
+
23
+ def set_error(row):
24
+ if row.error not in [True, False]:
25
+ if not row.prompt_response:
26
+ return True
27
+ return False
28
+ return row.error
29
+
30
+ def get_error_data():
31
+ tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
32
+ tools_inc['error'] = tools_inc.apply(set_error, axis=1)
33
+ error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
34
+ error['error_perc'] = (error[True] / (error[False] + error[True]))*100
35
+ error['total_requests'] = error[False] + error[True]
36
+
37
+ return error
38
+
39
+ def get_error_data_all(error):
40
+ error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True:'sum'}).reset_index()
41
+ error_total['error_perc'] = (error_total[True] / error_total['total_requests'])*100
42
+ # convert column name to string
43
+ error_total.columns = error_total.columns.astype(str)
44
+ # format all values to 4 decimal places for error_perc
45
+ error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
46
+ return error_total
47
+
48
+ error = get_error_data()
49
+ error_all = get_error_data_all(error)
50
+ print(error_all.head())
51
+
52
+ with demo:
53
+ gr.HTML("<h1>Olas Predict Actual Performance</h1>")
54
+ gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
55
+
56
+ with gr.Tabs():
57
+ with gr.TabItem("🔥 Error Dashboard"):
58
+ with gr.Row():
59
+ gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
60
+ with gr.Row():
61
+ # plot
62
+ with gr.Column():
63
+ gr.LinePlot(
64
+ value=error_all,
65
+ x="request_month_year_week",
66
+ y="error_perc",
67
+ title="Error Percentage",
68
+ x_title="Week",
69
+ y_title="Error Percentage",
70
+ height=400,
71
+ show_label=True
72
+ )
73
+ gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
74
+
75
+ # Dropdown for selecting the tool
76
+ sel_tool = gr.Dropdown(
77
+ value="prediction-online",
78
+ choices=INC_TOOLS,
79
+ label="Select a tool"
80
+ )
81
+ plot_tool_error = gr.LinePlot(
82
+ title="Error Percentage",
83
+ x_title="Week",
84
+ y_title="Error Percentage",
85
+ render=False
86
+ )
87
+
88
+ # Dropdown for selecting the week
89
+ sel_week = gr.Dropdown(
90
+ value=error['request_month_year_week'].iloc[-1],
91
+ choices=error['request_month_year_week'].unique().tolist(),
92
+ label="Select a week"
93
+ )
94
+ plot_week_error = gr.BarPlot(
95
+ title="Error Percentage",
96
+ x_title="Tool",
97
+ y_title="Error Percentage",
98
+ render=False
99
+ )
100
+
101
+ def update_tool_plot(selected_tool):
102
+ filtered_data = error[error['tool'] == selected_tool]
103
+ # convert column name to string
104
+ filtered_data.columns = filtered_data.columns.astype(str)
105
+ # conver error_perc to 4 decimal place
106
+ filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
107
+ print(filtered_data.head())
108
+ return {
109
+ "x": filtered_data['request_month_year_week'].tolist(),
110
+ "y": filtered_data['error_perc'].tolist(),
111
+ }
112
+
113
+ def update_week_plot(selected_week):
114
+ filtered_data = error[error['request_month_year_week'] == selected_week]
115
+ filtered_data.columns = filtered_data.columns.astype(str)
116
+ filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
117
+ print(filtered_data.head())
118
+ return {
119
+ "x": filtered_data['tool'].tolist(),
120
+ "y": filtered_data['error_perc'].tolist(),
121
+ }
122
+
123
+ sel_tool.change(fn=update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
124
+ sel_week.change(fn=update_week_plot, inputs=sel_week, outputs=plot_week_error)
125
+
126
+ with gr.Row():
127
+ plot_tool_error.render()
128
+ with gr.Row():
129
+ plot_week_error.render()
130
+
131
+ with gr.TabItem("ℹ️ About"):
132
+ with gr.Accordion("About the Benchmark", open=False):
133
+ gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
134
+
135
+ demo.queue(default_concurrency_limit=40).launch()
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ tools = pd.read_csv("./data/tools.csv")
5
+
6
+ demo = gr.Blocks()
7
+
8
+ INC_TOOLS = [
9
+ 'prediction-online',
10
+ 'prediction-offline',
11
+ 'claude-prediction-online',
12
+ 'claude-prediction-offline',
13
+ 'prediction-offline-sme',
14
+ 'prediction-online-sme',
15
+ 'prediction-request-rag',
16
+ 'prediction-request-reasoning',
17
+ 'prediction-url-cot-claude',
18
+ 'prediction-request-rag-claude',
19
+ 'prediction-request-reasoning-claude'
20
+ ]
21
+
22
+ def set_error(row):
23
+ if row.error not in [True, False]:
24
+ if not row.prompt_response:
25
+ return True
26
+ return False
27
+ return row.error
28
+
29
+ def get_error_data():
30
+ tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
31
+ tools_inc['error'] = tools_inc.apply(set_error, axis=1)
32
+ error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
33
+ error['error_perc'] = (error[True] / (error[False] + error[True])) * 100
34
+ error['total_requests'] = error[False] + error[True]
35
+ return error
36
+
37
+ def get_error_data_all(error):
38
+ error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()
39
+ error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100
40
+ error_total.columns = error_total.columns.astype(str)
41
+ error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
42
+ return error_total
43
+
44
+ error = get_error_data()
45
+ error_all = get_error_data_all(error)
46
+
47
+ with demo:
48
+ gr.HTML("<h1>Olas Predict Actual Performance</h1>")
49
+ gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
50
+
51
+ with gr.Tabs():
52
+ with gr.TabItem("🔥 Error Dashboard"):
53
+ with gr.Row():
54
+ gr.Markdown("# Plot showing overall error")
55
+ with gr.Row():
56
+ # plot
57
+ with gr.Column():
58
+ gr.BarPlot(
59
+ value=error_all,
60
+ x="request_month_year_week",
61
+ y="error_perc",
62
+ title="Error Percentage",
63
+ x_title="Week",
64
+ y_title="Error Percentage",
65
+ height=800,
66
+ show_label=True,
67
+ interactive=True,
68
+ show_actions_button=True,
69
+ tooltip=["request_month_year_week", "error_perc"]
70
+ )
71
+ with gr.Row():
72
+ gr.Markdown("# Plot showing error by tool")
73
+
74
+ with gr.Row():
75
+ sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0])
76
+
77
+ with gr.Row():
78
+ plot_tool_error = gr.BarPlot(
79
+ title="Error Percentage",
80
+ x_title="Week",
81
+ y_title="Error Percentage",
82
+ show_label=True,
83
+ interactive=True,
84
+ show_actions_button=True,
85
+ tooltip=["request_month_year_week", "error_perc"],
86
+ width=800
87
+ )
88
+
89
+ with gr.Row():
90
+ gr.Markdown("# Plot showing error by week")
91
+
92
+ with gr.Row():
93
+ choices = error['request_month_year_week'].unique().tolist()
94
+ # sort the choices by the latest week to be on the top
95
+ choices = sorted(choices)
96
+ sel_week = gr.Dropdown(
97
+ label="Select a week",
98
+ choices=choices,
99
+ value=choices[-1]
100
+ )
101
+
102
+ with gr.Row():
103
+ plot_week_error = gr.BarPlot(
104
+ title="Error Percentage",
105
+ x_title="Tool",
106
+ y_title="Error Percentage",
107
+ show_label=True,
108
+ interactive=True,
109
+ show_actions_button=True,
110
+ tooltip=["tool", "error_perc"],
111
+ width=800
112
+ )
113
+
114
+
115
+ def update_tool_plot(selected_tool):
116
+ filtered_data = error[error['tool'] == selected_tool]
117
+ # convert column name to string
118
+ filtered_data.columns = filtered_data.columns.astype(str)
119
+ # convert error_perc to 4 decimal place
120
+ filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
121
+ update = gr.LinePlot(
122
+ title="Error Percentage",
123
+ x_title="Week",
124
+ y_title="Error Percentage",
125
+ x="request_month_year_week",
126
+ y="error_perc",
127
+ value=filtered_data
128
+ )
129
+ return update
130
+
131
+ def update_week_plot(selected_week):
132
+ filtered_data = error[error['request_month_year_week'] == selected_week]
133
+ # convert column name to string
134
+ filtered_data.columns = filtered_data.columns.astype(str)
135
+ # convert error_perc to 4 decimal place
136
+ filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
137
+ update = gr.BarPlot(
138
+ title="Error Percentage",
139
+ x_title="Tool",
140
+ y_title="Error Percentage",
141
+ x="tool",
142
+ y="error_perc",
143
+ value=filtered_data
144
+ )
145
+ return update
146
+
147
+ sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
148
+ sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error)
149
+
150
+ with gr.Row():
151
+ sel_tool
152
+ with gr.Row():
153
+ plot_tool_error
154
+ with gr.Row():
155
+ sel_week
156
+ with gr.Row():
157
+ plot_week_error
158
+
159
+ with gr.TabItem("ℹ️ About"):
160
+ with gr.Accordion("About the Benchmark"):
161
+ gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
162
+
163
+ demo.queue(default_concurrency_limit=40).launch()
data/all_trades_profitability.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28ee508150a1cba56c9439d0cbfcf4871cb9f32f0792eb1d4dd7bca95af1e903
3
+ size 28328169
data/delivers.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dafbbf73918de11435cbeaee7196ab0f37a18b06656a0c5325b1fa86be98b2c
3
+ size 1121772123
data/fpmmTrades.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a16a49dac94891d4438ea4eba6a52d6ef00f2985bbcc0e41daeb6f8557f5536
3
+ size 62639698
data/fpmms.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7933c45ab45cf377b55dbdc49f413ede81a7582cd843717c70cdd71f8fa7b74
3
+ size 391125
data/requests.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d06d62c1fe5dd50fe5c7e3066413e843eb536cc51f08325fd85570b8255007
3
+ size 124945839
data/summary_profitability.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4769b2c800f4a3c655de8a5673070c5be00ce5733798cc9a745cc5df2f961a6
3
+ size 46612
data/tools.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4045250b8b4ec74ca3d37ce94208665c1ea09042b6681106615f0773ce46aee0
3
+ size 1211219315
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pandas
2
+ matplotlib
3
+ huggingface-hub
4
+ pyarrow
5
+ web3
6
+ requests
7
+ gradio
scripts/markets.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ------------------------------------------------------------------------------
3
+ #
4
+ # Copyright 2023 Valory AG
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ # ------------------------------------------------------------------------------
19
+
20
+ import functools
21
+ import warnings
22
+ from string import Template
23
+ from typing import Optional, Generator, Callable
24
+
25
+ import pandas as pd
26
+ import requests
27
+ from tqdm import tqdm
28
+
29
+ from typing import List, Dict
30
+
31
+
32
+ ResponseItemType = List[Dict[str, str]]
33
+ SubgraphResponseType = Dict[str, ResponseItemType]
34
+
35
+
36
+ CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
37
+ BATCH_SIZE = 1000
38
+ OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
39
+ FPMMS_FIELD = "fixedProductMarketMakers"
40
+ QUERY_FIELD = "query"
41
+ ERROR_FIELD = "errors"
42
+ DATA_FIELD = "data"
43
+ ID_FIELD = "id"
44
+ ANSWER_FIELD = "currentAnswer"
45
+ QUESTION_FIELD = "question"
46
+ OUTCOMES_FIELD = "outcomes"
47
+ TITLE_FIELD = "title"
48
+ MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
49
+ DEFAULT_FILENAME = "fpmms.csv"
50
+
51
+ FPMMS_QUERY = Template(
52
+ """
53
+ {
54
+ ${fpmms_field}(
55
+ where: {
56
+ creator: "${creator}",
57
+ id_gt: "${fpmm_id}",
58
+ isPendingArbitration: false
59
+ },
60
+ orderBy: ${id_field}
61
+ first: ${first}
62
+ ){
63
+ ${id_field}
64
+ ${answer_field}
65
+ ${question_field} {
66
+ ${outcomes_field}
67
+ }
68
+ ${title_field}
69
+ }
70
+ }
71
+ """
72
+ )
73
+
74
+
75
+ class RetriesExceeded(Exception):
76
+ """Exception to raise when retries are exceeded during data-fetching."""
77
+
78
+ def __init__(
79
+ self, msg="Maximum retries were exceeded while trying to fetch the data!"
80
+ ):
81
+ super().__init__(msg)
82
+
83
+
84
+ def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
85
+ """Create a hacky retry strategy.
86
+ Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
87
+ because the subgraph does not return the appropriate status codes in case of failure.
88
+ Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
89
+ catch those exceptions in the hacky retry decorator and try again.
90
+ Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
91
+
92
+ :param func: the input request function.
93
+ :param n_retries: the maximum allowed number of retries.
94
+ :return: The request method with the hacky retry strategy applied.
95
+ """
96
+
97
+ @functools.wraps(func)
98
+ def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
99
+ """The wrapper for the hacky retry.
100
+
101
+ :return: a response dictionary.
102
+ """
103
+ retried = 0
104
+
105
+ while retried <= n_retries:
106
+ try:
107
+ if retried > 0:
108
+ warnings.warn(f"Retrying {retried}/{n_retries}...")
109
+
110
+ return func(*args, **kwargs)
111
+ except (ValueError, ConnectionError) as e:
112
+ warnings.warn(e.args[0])
113
+ finally:
114
+ retried += 1
115
+
116
+ raise RetriesExceeded()
117
+
118
+ return wrapper_hacky_retry
119
+
120
+
121
+ @hacky_retry
122
+ def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
123
+ """Query a subgraph.
124
+
125
+ Args:
126
+ url: the subgraph's URL.
127
+ query: the query to be used.
128
+ key: the key to use in order to access the required data.
129
+
130
+ Returns:
131
+ a response dictionary.
132
+ """
133
+ content = {QUERY_FIELD: query}
134
+ headers = {
135
+ "Accept": "application/json",
136
+ "Content-Type": "application/json",
137
+ }
138
+ res = requests.post(url, json=content, headers=headers)
139
+
140
+ if res.status_code != 200:
141
+ raise ConnectionError(
142
+ "Something went wrong while trying to communicate with the subgraph "
143
+ f"(Error: {res.status_code})!\n{res.text}"
144
+ )
145
+
146
+ body = res.json()
147
+ if ERROR_FIELD in body.keys():
148
+ raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
149
+
150
+ data = body.get(DATA_FIELD, {}).get(key, None)
151
+ if data is None:
152
+ raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
153
+
154
+ return data
155
+
156
+
157
+ def fpmms_fetcher() -> Generator[ResponseItemType, int, None]:
158
+ """An indefinite fetcher for the FPMMs."""
159
+ while True:
160
+ fpmm_id = yield
161
+ fpmms_query = FPMMS_QUERY.substitute(
162
+ creator=CREATOR,
163
+ fpmm_id=fpmm_id,
164
+ fpmms_field=FPMMS_FIELD,
165
+ first=BATCH_SIZE,
166
+ id_field=ID_FIELD,
167
+ answer_field=ANSWER_FIELD,
168
+ question_field=QUESTION_FIELD,
169
+ outcomes_field=OUTCOMES_FIELD,
170
+ title_field=TITLE_FIELD,
171
+ )
172
+ yield query_subgraph(OMEN_SUBGRAPH, fpmms_query, FPMMS_FIELD)
173
+
174
+
175
+ def fetch_fpmms() -> pd.DataFrame:
176
+ """Fetch all the fpmms of the creator."""
177
+ latest_id = ""
178
+ fpmms = []
179
+ fetcher = fpmms_fetcher()
180
+ for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
181
+ batch = fetcher.send(latest_id)
182
+ if len(batch) == 0:
183
+ break
184
+
185
+ latest_id = batch[-1].get(ID_FIELD, "")
186
+ if latest_id == "":
187
+ raise ValueError(f"Unexpected data format retrieved: {batch}")
188
+
189
+ fpmms.extend(batch)
190
+
191
+ return pd.DataFrame(fpmms)
192
+
193
+
194
+ def get_answer(fpmm: pd.Series) -> str:
195
+ """Get an answer from its index, using Series of an FPMM."""
196
+ return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]
197
+
198
+
199
+ def transform_fpmms(fpmms: pd.DataFrame) -> pd.DataFrame:
200
+ """Transform an FPMMS dataframe."""
201
+ transformed = fpmms.dropna()
202
+ transformed = transformed.drop_duplicates([ID_FIELD])
203
+ transformed = transformed.loc[transformed[ANSWER_FIELD] != MAX_UINT_HEX]
204
+ transformed.loc[:, ANSWER_FIELD] = (
205
+ transformed[ANSWER_FIELD].str.slice(-1).astype(int)
206
+ )
207
+ transformed.loc[:, ANSWER_FIELD] = transformed.apply(get_answer, axis=1)
208
+ transformed = transformed.drop(columns=[QUESTION_FIELD])
209
+
210
+ return transformed
211
+
212
+
213
+ def etl(filename: Optional[str] = None) -> pd.DataFrame:
214
+ """Fetch, process, store and return the markets as a Dataframe."""
215
+ fpmms = fetch_fpmms()
216
+ fpmms = transform_fpmms(fpmms)
217
+
218
+ if filename:
219
+ fpmms.to_csv(filename, index=False)
220
+
221
+ return fpmms
222
+
223
+
224
+ if __name__ == "__main__":
225
+ etl(DEFAULT_FILENAME)
scripts/profitability.py ADDED
@@ -0,0 +1,631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ------------------------------------------------------------------------------
3
+ #
4
+ # Copyright 2023 Valory AG
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ # ------------------------------------------------------------------------------
19
+
20
+ import time
21
+ import requests
22
+ import datetime
23
+ import pandas as pd
24
+ from collections import defaultdict
25
+ from typing import Any, Union
26
+ from string import Template
27
+ from enum import Enum
28
+ from tqdm import tqdm
29
+ import numpy as np
30
+
31
+
32
+ IRRELEVANT_TOOLS = [
33
+ "openai-text-davinci-002",
34
+ "openai-text-davinci-003",
35
+ "openai-gpt-3.5-turbo",
36
+ "openai-gpt-4",
37
+ "stabilityai-stable-diffusion-v1-5",
38
+ "stabilityai-stable-diffusion-xl-beta-v2-2-2",
39
+ "stabilityai-stable-diffusion-512-v2-1",
40
+ "stabilityai-stable-diffusion-768-v2-1",
41
+ "deepmind-optimization-strong",
42
+ "deepmind-optimization",
43
+ ]
44
+ QUERY_BATCH_SIZE = 1000
45
+ DUST_THRESHOLD = 10000000000000
46
+ INVALID_ANSWER_HEX = (
47
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
48
+ )
49
+ INVALID_ANSWER = -1
50
+ FPMM_CREATOR = "0x89c5cc945dd550bcffb72fe42bff002429f46fec"
51
+ DEFAULT_FROM_DATE = "1970-01-01T00:00:00"
52
+ DEFAULT_TO_DATE = "2038-01-19T03:14:07"
53
+ DEFAULT_FROM_TIMESTAMP = 0
54
+ DEFAULT_TO_TIMESTAMP = 2147483647
55
+ WXDAI_CONTRACT_ADDRESS = "0xe91D153E0b41518A2Ce8Dd3D7944Fa863463a97d"
56
+ DEFAULT_MECH_FEE = 0.01
57
+ DUST_THRESHOLD = 10000000000000
58
+
59
+
60
+ class MarketState(Enum):
61
+ """Market state"""
62
+
63
+ OPEN = 1
64
+ PENDING = 2
65
+ FINALIZING = 3
66
+ ARBITRATING = 4
67
+ CLOSED = 5
68
+
69
+ def __str__(self) -> str:
70
+ """Prints the market status."""
71
+ return self.name.capitalize()
72
+
73
+
74
+ class MarketAttribute(Enum):
75
+ """Attribute"""
76
+
77
+ NUM_TRADES = "Num_trades"
78
+ WINNER_TRADES = "Winner_trades"
79
+ NUM_REDEEMED = "Num_redeemed"
80
+ INVESTMENT = "Investment"
81
+ FEES = "Fees"
82
+ MECH_CALLS = "Mech_calls"
83
+ MECH_FEES = "Mech_fees"
84
+ EARNINGS = "Earnings"
85
+ NET_EARNINGS = "Net_earnings"
86
+ REDEMPTIONS = "Redemptions"
87
+ ROI = "ROI"
88
+
89
+ def __str__(self) -> str:
90
+ """Prints the attribute."""
91
+ return self.value
92
+
93
+ def __repr__(self) -> str:
94
+ """Prints the attribute representation."""
95
+ return self.name
96
+
97
+ @staticmethod
98
+ def argparse(s: str) -> "MarketAttribute":
99
+ """Performs string conversion to MarketAttribute."""
100
+ try:
101
+ return MarketAttribute[s.upper()]
102
+ except KeyError as e:
103
+ raise ValueError(f"Invalid MarketAttribute: {s}") from e
104
+
105
+
106
+ ALL_TRADES_STATS_DF_COLS = [
107
+ "trader_address",
108
+ "trade_id",
109
+ "creation_timestamp",
110
+ "title",
111
+ "market_status",
112
+ "collateral_amount",
113
+ "outcome_index",
114
+ "trade_fee_amount",
115
+ "outcomes_tokens_traded",
116
+ "current_answer",
117
+ "is_invalid",
118
+ "winning_trade",
119
+ "earnings",
120
+ "redeemed",
121
+ "redeemed_amount",
122
+ "num_mech_calls",
123
+ "mech_fee_amount",
124
+ "net_earnings",
125
+ "roi",
126
+ ]
127
+
128
+ SUMMARY_STATS_DF_COLS = [
129
+ "trader_address",
130
+ "num_trades",
131
+ "num_winning_trades",
132
+ "num_redeemed",
133
+ "total_investment",
134
+ "total_trade_fees",
135
+ "num_mech_calls",
136
+ "total_mech_fees",
137
+ "total_earnings",
138
+ "total_redeemed_amount",
139
+ "total_net_earnings",
140
+ "total_net_earnings_wo_mech_fees",
141
+ "total_roi",
142
+ "total_roi_wo_mech_fees",
143
+ "mean_mech_calls_per_trade",
144
+ "mean_mech_fee_amount_per_trade",
145
+ ]
146
+ headers = {
147
+ "Accept": "application/json, multipart/mixed",
148
+ "Content-Type": "application/json",
149
+ }
150
+
151
+
152
+ omen_xdai_trades_query = Template(
153
+ """
154
+ {
155
+ fpmmTrades(
156
+ where: {
157
+ type: Buy,
158
+ fpmm_: {
159
+ creator: "${fpmm_creator}"
160
+ creationTimestamp_gte: "${fpmm_creationTimestamp_gte}",
161
+ creationTimestamp_lt: "${fpmm_creationTimestamp_lte}"
162
+ },
163
+ creationTimestamp_gte: "${creationTimestamp_gte}",
164
+ creationTimestamp_lte: "${creationTimestamp_lte}"
165
+ id_gt: "${id_gt}"
166
+ }
167
+ first: ${first}
168
+ orderBy: id
169
+ orderDirection: asc
170
+ ) {
171
+ id
172
+ title
173
+ collateralToken
174
+ outcomeTokenMarginalPrice
175
+ oldOutcomeTokenMarginalPrice
176
+ type
177
+ creator {
178
+ id
179
+ }
180
+ creationTimestamp
181
+ collateralAmount
182
+ collateralAmountUSD
183
+ feeAmount
184
+ outcomeIndex
185
+ outcomeTokensTraded
186
+ transactionHash
187
+ fpmm {
188
+ id
189
+ outcomes
190
+ title
191
+ answerFinalizedTimestamp
192
+ currentAnswer
193
+ isPendingArbitration
194
+ arbitrationOccurred
195
+ openingTimestamp
196
+ condition {
197
+ id
198
+ }
199
+ }
200
+ }
201
+ }
202
+ """
203
+ )
204
+
205
+
206
+ conditional_tokens_gc_user_query = Template(
207
+ """
208
+ {
209
+ user(id: "${id}") {
210
+ userPositions(
211
+ first: ${first}
212
+ where: {
213
+ id_gt: "${userPositions_id_gt}"
214
+ }
215
+ orderBy: id
216
+ ) {
217
+ balance
218
+ id
219
+ position {
220
+ id
221
+ conditionIds
222
+ }
223
+ totalBalance
224
+ wrappedBalance
225
+ }
226
+ }
227
+ }
228
+ """
229
+ )
230
+
231
+
232
+ def _to_content(q: str) -> dict[str, Any]:
233
+ """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
234
+ finalized_query = {
235
+ "query": q,
236
+ "variables": None,
237
+ "extensions": {"headers": None},
238
+ }
239
+ return finalized_query
240
+
241
+
242
+ def _query_omen_xdai_subgraph(
243
+ from_timestamp: float,
244
+ to_timestamp: float,
245
+ fpmm_from_timestamp: float,
246
+ fpmm_to_timestamp: float,
247
+ ) -> dict[str, Any]:
248
+ """Query the subgraph."""
249
+ url = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
250
+
251
+ grouped_results = defaultdict(list)
252
+ id_gt = ""
253
+
254
+ while True:
255
+ query = omen_xdai_trades_query.substitute(
256
+ fpmm_creator=FPMM_CREATOR.lower(),
257
+ creationTimestamp_gte=int(from_timestamp),
258
+ creationTimestamp_lte=int(to_timestamp),
259
+ fpmm_creationTimestamp_gte=int(fpmm_from_timestamp),
260
+ fpmm_creationTimestamp_lte=int(fpmm_to_timestamp),
261
+ first=QUERY_BATCH_SIZE,
262
+ id_gt=id_gt,
263
+ )
264
+ content_json = _to_content(query)
265
+ res = requests.post(url, headers=headers, json=content_json)
266
+ result_json = res.json()
267
+ user_trades = result_json.get("data", {}).get("fpmmTrades", [])
268
+
269
+ if not user_trades:
270
+ break
271
+
272
+ for trade in user_trades:
273
+ fpmm_id = trade.get("fpmm", {}).get("id")
274
+ grouped_results[fpmm_id].append(trade)
275
+
276
+ id_gt = user_trades[len(user_trades) - 1]["id"]
277
+
278
+ all_results = {
279
+ "data": {
280
+ "fpmmTrades": [
281
+ trade
282
+ for trades_list in grouped_results.values()
283
+ for trade in trades_list
284
+ ]
285
+ }
286
+ }
287
+
288
+ return all_results
289
+
290
+
291
+ def _query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
292
+ """Query the subgraph."""
293
+ url = "https://api.thegraph.com/subgraphs/name/gnosis/conditional-tokens-gc"
294
+
295
+ all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
296
+ userPositions_id_gt = ""
297
+ while True:
298
+ query = conditional_tokens_gc_user_query.substitute(
299
+ id=creator.lower(),
300
+ first=QUERY_BATCH_SIZE,
301
+ userPositions_id_gt=userPositions_id_gt,
302
+ )
303
+ content_json = {"query": query}
304
+ res = requests.post(url, headers=headers, json=content_json)
305
+ result_json = res.json()
306
+ user_data = result_json.get("data", {}).get("user", {})
307
+
308
+ if not user_data:
309
+ break
310
+
311
+ user_positions = user_data.get("userPositions", [])
312
+
313
+ if user_positions:
314
+ all_results["data"]["user"]["userPositions"].extend(user_positions)
315
+ userPositions_id_gt = user_positions[len(user_positions) - 1]["id"]
316
+ else:
317
+ break
318
+
319
+ if len(all_results["data"]["user"]["userPositions"]) == 0:
320
+ return {"data": {"user": None}}
321
+
322
+ return all_results
323
+
324
+
325
+ def convert_hex_to_int(x: Union[str, float]) -> Union[int, float]:
326
+ """Convert hex to int"""
327
+ if isinstance(x, float):
328
+ return np.nan
329
+ elif isinstance(x, str):
330
+ if x == INVALID_ANSWER_HEX:
331
+ return -1
332
+ else:
333
+ return int(x, 16)
334
+
335
+
336
+ def wei_to_unit(wei: int) -> float:
337
+ """Converts wei to currency unit."""
338
+ return wei / 10**18
339
+
340
+
341
+ def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
342
+ """Returns whether the user has redeemed the position."""
343
+ user_positions = user_json["data"]["user"]["userPositions"]
344
+ outcomes_tokens_traded = int(fpmmTrade["outcomeTokensTraded"])
345
+ condition_id = fpmmTrade["fpmm.condition.id"]
346
+
347
+ for position in user_positions:
348
+ position_condition_ids = position["position"]["conditionIds"]
349
+ balance = int(position["balance"])
350
+
351
+ if condition_id in position_condition_ids:
352
+ if balance == 0:
353
+ return True
354
+ # return early
355
+ return False
356
+ return False
357
+
358
+
359
+ def create_fpmmTrades(rpc: str):
360
+ """Create fpmmTrades for all trades."""
361
+ trades_json = _query_omen_xdai_subgraph(
362
+ from_timestamp=DEFAULT_FROM_TIMESTAMP,
363
+ to_timestamp=DEFAULT_TO_TIMESTAMP,
364
+ fpmm_from_timestamp=DEFAULT_FROM_TIMESTAMP,
365
+ fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
366
+ )
367
+
368
+ # convert to dataframe
369
+ df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
370
+
371
+ # convert creator to address
372
+ df["creator"] = df["creator"].apply(lambda x: x["id"])
373
+
374
+ # normalize fpmm column
375
+ fpmm = pd.json_normalize(df["fpmm"])
376
+ fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
377
+ df = pd.concat([df, fpmm], axis=1)
378
+
379
+ # drop fpmm column
380
+ df.drop(["fpmm"], axis=1, inplace=True)
381
+
382
+ # change creator to creator_address
383
+ df.rename(columns={"creator": "trader_address"}, inplace=True)
384
+
385
+ # save to csv
386
+ df.to_csv("fpmmTrades.csv", index=False)
387
+
388
+ return df
389
+
390
+
391
+ def prepare_profitalibity_data(rpc: str):
392
+ """Prepare data for profitalibity analysis."""
393
+
394
+ # Check if tools.py is in the same directory
395
+ try:
396
+ # load tools.csv
397
+ tools = pd.read_csv("tools.csv")
398
+
399
+ # make sure creator_address is in the columns
400
+ assert "trader_address" in tools.columns, "trader_address column not found"
401
+
402
+ # lowercase and strip creator_address
403
+ tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
404
+
405
+ # drop duplicates
406
+ tools.drop_duplicates(inplace=True)
407
+
408
+ print("tools.csv loaded")
409
+ except FileNotFoundError:
410
+ print("tools.csv not found. Please run tools.py first.")
411
+ return
412
+
413
+ # Check if fpmmTrades.csv is in the same directory
414
+ try:
415
+ # load fpmmTrades.csv
416
+ fpmmTrades = pd.read_csv("fpmmTrades.csv")
417
+ print("fpmmTrades.csv loaded")
418
+ except FileNotFoundError:
419
+ print("fpmmTrades.csv not found. Creating fpmmTrades.csv...")
420
+ fpmmTrades = create_fpmmTrades(rpc)
421
+ fpmmTrades.to_csv("fpmmTrades.csv", index=False)
422
+ fpmmTrades = pd.read_csv("fpmmTrades.csv")
423
+
424
+ # make sure trader_address is in the columns
425
+ assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
426
+
427
+ # lowercase and strip creator_address
428
+ fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
429
+
430
+ return fpmmTrades, tools
431
+
432
+
433
+ def determine_market_status(trade, current_answer):
434
+ """Determine the market status of a trade."""
435
+ if current_answer is np.nan and time.time() >= trade["fpmm.openingTimestamp"]:
436
+ return MarketState.PENDING
437
+ elif current_answer == np.nan:
438
+ return MarketState.OPEN
439
+ elif trade["fpmm.isPendingArbitration"]:
440
+ return MarketState.ARBITRATING
441
+ elif time.time() < trade["fpmm.answerFinalizedTimestamp"]:
442
+ return MarketState.FINALIZING
443
+ return MarketState.CLOSED
444
+
445
+
446
+ def analyse_trader(
447
+ trader_address: str, fpmmTrades: pd.DataFrame, tools: pd.DataFrame
448
+ ) -> pd.DataFrame:
449
+ """Analyse a trader's trades"""
450
+ # Filter trades and tools for the given trader
451
+ trades = fpmmTrades[fpmmTrades["trader_address"] == trader_address]
452
+ tools_usage = tools[tools["trader_address"] == trader_address]
453
+
454
+ # Prepare the DataFrame
455
+ trades_df = pd.DataFrame(columns=ALL_TRADES_STATS_DF_COLS)
456
+ if trades.empty:
457
+ return trades_df
458
+
459
+ # Fetch user's conditional tokens gc graph
460
+ try:
461
+ user_json = _query_conditional_tokens_gc_subgraph(trader_address)
462
+ except Exception as e:
463
+ print(f"Error fetching user data: {e}")
464
+ return trades_df
465
+
466
+ # Iterate over the trades
467
+ for i, trade in tqdm(trades.iterrows(), total=len(trades), desc="Analysing trades"):
468
+ try:
469
+ # Parsing and computing shared values
470
+ creation_timestamp_utc = datetime.datetime.fromtimestamp(
471
+ trade["creationTimestamp"], tz=datetime.timezone.utc
472
+ )
473
+ collateral_amount = wei_to_unit(float(trade["collateralAmount"]))
474
+ fee_amount = wei_to_unit(float(trade["feeAmount"]))
475
+ outcome_tokens_traded = wei_to_unit(float(trade["outcomeTokensTraded"]))
476
+ earnings, winner_trade = (0, False)
477
+ redemption = _is_redeemed(user_json, trade)
478
+ current_answer = trade["fpmm.currentAnswer"]
479
+
480
+ # Determine market status
481
+ market_status = determine_market_status(trade, current_answer)
482
+
483
+ # Skip non-closed markets
484
+ if market_status != MarketState.CLOSED:
485
+ print(
486
+ f"Skipping trade {i} because market is not closed. Market Status: {market_status}"
487
+ )
488
+ continue
489
+ current_answer = convert_hex_to_int(current_answer)
490
+
491
+ # Compute invalidity
492
+ is_invalid = current_answer == INVALID_ANSWER
493
+
494
+ # Compute earnings and winner trade status
495
+ if is_invalid:
496
+ earnings = collateral_amount
497
+ winner_trade = False
498
+ elif trade["outcomeIndex"] == current_answer:
499
+ earnings = outcome_tokens_traded
500
+ winner_trade = True
501
+
502
+ # Compute mech calls
503
+ num_mech_calls = (
504
+ tools_usage["prompt_request"].apply(lambda x: trade["title"] in x).sum()
505
+ )
506
+ net_earnings = (
507
+ earnings
508
+ - fee_amount
509
+ - (num_mech_calls * DEFAULT_MECH_FEE)
510
+ - collateral_amount
511
+ )
512
+
513
+ # Assign values to DataFrame
514
+ trades_df.loc[i] = {
515
+ "trader_address": trader_address,
516
+ "trade_id": trade["id"],
517
+ "market_status": market_status.name,
518
+ "creation_timestamp": creation_timestamp_utc,
519
+ "title": trade["title"],
520
+ "collateral_amount": collateral_amount,
521
+ "outcome_index": trade["outcomeIndex"],
522
+ "trade_fee_amount": fee_amount,
523
+ "outcomes_tokens_traded": outcome_tokens_traded,
524
+ "current_answer": current_answer,
525
+ "is_invalid": is_invalid,
526
+ "winning_trade": winner_trade,
527
+ "earnings": earnings,
528
+ "redeemed": redemption,
529
+ "redeemed_amount": earnings if redemption else 0,
530
+ "num_mech_calls": num_mech_calls,
531
+ "mech_fee_amount": num_mech_calls * DEFAULT_MECH_FEE,
532
+ "net_earnings": net_earnings,
533
+ "roi": net_earnings / collateral_amount,
534
+ }
535
+
536
+ except Exception as e:
537
+ print(f"Error processing trade {i}: {e}")
538
+ continue
539
+
540
+ return trades_df
541
+
542
+
543
+ def analyse_all_traders(trades: pd.DataFrame, tools: pd.DataFrame) -> pd.DataFrame:
544
+ """Analyse all creators."""
545
+ all_traders = []
546
+ for trader in tqdm(
547
+ trades["trader_address"].unique(),
548
+ total=len(trades["trader_address"].unique()),
549
+ desc="Analysing creators",
550
+ ):
551
+ all_traders.append(analyse_trader(trader, trades, tools))
552
+
553
+ # concat all creators
554
+ all_creators_df = pd.concat(all_traders)
555
+
556
+ return all_creators_df
557
+
558
+
559
+ def summary_analyse(df):
560
+ """Summarise profitability analysis."""
561
+ # Ensure DataFrame is not empty
562
+ if df.empty:
563
+ return pd.DataFrame(columns=SUMMARY_STATS_DF_COLS)
564
+
565
+ # Group by trader_address
566
+ grouped = df.groupby("trader_address")
567
+
568
+ # Create summary DataFrame
569
+ summary_df = grouped.agg(
570
+ num_trades=("trader_address", "size"),
571
+ num_winning_trades=("winning_trade", lambda x: float((x).sum())),
572
+ num_redeemed=("redeemed", lambda x: float(x.sum())),
573
+ total_investment=("collateral_amount", "sum"),
574
+ total_trade_fees=("trade_fee_amount", "sum"),
575
+ num_mech_calls=("num_mech_calls", "sum"),
576
+ total_mech_fees=("mech_fee_amount", "sum"),
577
+ total_earnings=("earnings", "sum"),
578
+ total_redeemed_amount=("redeemed_amount", "sum"),
579
+ total_net_earnings=("net_earnings", "sum"),
580
+ )
581
+
582
+ # Calculating additional columns
583
+ summary_df["total_roi"] = (
584
+ summary_df["total_net_earnings"] / summary_df["total_investment"]
585
+ )
586
+ summary_df["mean_mech_calls_per_trade"] = (
587
+ summary_df["num_mech_calls"] / summary_df["num_trades"]
588
+ )
589
+ summary_df["mean_mech_fee_amount_per_trade"] = (
590
+ summary_df["total_mech_fees"] / summary_df["num_trades"]
591
+ )
592
+ summary_df["total_net_earnings_wo_mech_fees"] = (
593
+ summary_df["total_net_earnings"] + summary_df["total_mech_fees"]
594
+ )
595
+ summary_df["total_roi_wo_mech_fees"] = (
596
+ summary_df["total_net_earnings_wo_mech_fees"] / summary_df["total_investment"]
597
+ )
598
+
599
+ # Resetting index to include trader_address
600
+ summary_df.reset_index(inplace=True)
601
+
602
+ return summary_df
603
+
604
+
605
+ def run_profitability_analysis(rpc):
606
+ """Create all trades analysis."""
607
+
608
+ # load dfs from csv for analysis
609
+ print("Preparing data...")
610
+ fpmmTrades, tools = prepare_profitalibity_data(rpc)
611
+
612
+ # all trades profitability df
613
+ print("Analysing trades...")
614
+ all_trades_df = analyse_all_traders(fpmmTrades, tools)
615
+
616
+ # summarize profitability df
617
+ print("Summarising trades...")
618
+ summary_df = summary_analyse(all_trades_df)
619
+
620
+ # save to csv
621
+ all_trades_df.to_csv("all_trades_profitability.csv", index=False)
622
+ summary_df.to_csv("summary_profitability.csv", index=False)
623
+
624
+ print("Done!")
625
+
626
+ return all_trades_df, summary_df
627
+
628
+
629
+ if __name__ == "__main__":
630
+ rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
631
+ run_profitability_analysis(rpc)
scripts/tools.py ADDED
@@ -0,0 +1,761 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ------------------------------------------------------------------------------
3
+ #
4
+ # Copyright 2023 Valory AG
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ # ------------------------------------------------------------------------------
19
+
20
+ import json
21
+ import os.path
22
+ import re
23
+ import sys
24
+ import time
25
+ import random
26
+ from dataclasses import dataclass
27
+ from enum import Enum
28
+ from io import StringIO
29
+ from typing import (
30
+ Optional,
31
+ List,
32
+ Dict,
33
+ Any,
34
+ Union,
35
+ Callable,
36
+ Tuple,
37
+ )
38
+
39
+ import pandas as pd
40
+ import requests
41
+ from json.decoder import JSONDecodeError
42
+ from eth_typing import ChecksumAddress
43
+ from eth_utils import to_checksum_address
44
+ from requests.adapters import HTTPAdapter
45
+ from requests.exceptions import (
46
+ ReadTimeout as RequestsReadTimeoutError,
47
+ HTTPError as RequestsHTTPError,
48
+ )
49
+ from tqdm import tqdm
50
+ from urllib3 import Retry
51
+ from urllib3.exceptions import (
52
+ ReadTimeoutError as Urllib3ReadTimeoutError,
53
+ HTTPError as Urllib3HTTPError,
54
+ )
55
+ from web3 import Web3, HTTPProvider
56
+ from web3.exceptions import MismatchedABI
57
+ from web3.types import BlockParams
58
+ from concurrent.futures import ThreadPoolExecutor, as_completed
59
+
60
+ CONTRACTS_PATH = "contracts"
61
+ MECH_TO_INFO = {
62
+ # this block number is when the creator had its first tx ever, and after this mech's creation
63
+ "0xff82123dfb52ab75c417195c5fdb87630145ae81": ("old_mech_abi.json", 28911547),
64
+ # this block number is when this mech was created
65
+ "0x77af31de935740567cf4ff1986d04b2c964a786a": ("new_mech_abi.json", 30776879),
66
+ }
67
+ # optionally set the latest block to stop searching for the delivered events
68
+ LATEST_BLOCK: Optional[int] = None
69
+ LATEST_BLOCK_NAME: BlockParams = "latest"
70
+ BLOCK_DATA_NUMBER = "number"
71
+ BLOCKS_CHUNK_SIZE = 10_000
72
+ REDUCE_FACTOR = 0.25
73
+ EVENT_ARGUMENTS = "args"
74
+ DATA = "data"
75
+ REQUEST_ID = "requestId"
76
+ REQUEST_ID_FIELD = "request_id"
77
+ REQUEST_SENDER = "sender"
78
+ PROMPT_FIELD = "prompt"
79
+ BLOCK_FIELD = "block"
80
+ CID_PREFIX = "f01701220"
81
+ HTTP = "http://"
82
+ HTTPS = HTTP[:4] + "s" + HTTP[4:]
83
+ IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
84
+ IPFS_LINKS_SERIES_NAME = "ipfs_links"
85
+ BACKOFF_FACTOR = 1
86
+ STATUS_FORCELIST = [404, 500, 502, 503, 504]
87
+ DEFAULT_FILENAME = "tools.csv"
88
+ RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
89
+ ABI_ERROR = "The event signature did not match the provided ABI"
90
+ SLEEP = 0.5
91
+ HTTP_TIMEOUT = 10
92
+ N_IPFS_RETRIES = 1
93
+ N_RPC_RETRIES = 100
94
+ RPC_POLL_INTERVAL = 0.05
95
+ IPFS_POLL_INTERVAL = 0.05
96
+ FORMAT_UPDATE_BLOCK_NUMBER = 30411638
97
+ IRRELEVANT_TOOLS = [
98
+ "openai-text-davinci-002",
99
+ "openai-text-davinci-003",
100
+ "openai-gpt-3.5-turbo",
101
+ "openai-gpt-4",
102
+ "stabilityai-stable-diffusion-v1-5",
103
+ "stabilityai-stable-diffusion-xl-beta-v2-2-2",
104
+ "stabilityai-stable-diffusion-512-v2-1",
105
+ "stabilityai-stable-diffusion-768-v2-1",
106
+ "deepmind-optimization-strong",
107
+ "deepmind-optimization",
108
+ ]
109
+ # this is how frequently we will keep a snapshot of the progress so far in terms of blocks' batches
110
+ # for example, the value 1 means that for every `BLOCKS_CHUNK_SIZE` blocks that we search, we also store the snapshot
111
+ SNAPSHOT_RATE = 10
112
+ NUM_WORKERS = 10
113
+ GET_CONTENTS_BATCH_SIZE = 1000
114
+
115
+
116
+ class MechEventName(Enum):
117
+ """The mech's event names."""
118
+
119
+ REQUEST = "Request"
120
+ DELIVER = "Deliver"
121
+
122
+
123
+ @dataclass
124
+ class MechEvent:
125
+ """A mech's on-chain event representation."""
126
+
127
+ for_block: int
128
+ requestId: int
129
+ data: bytes
130
+ sender: str
131
+
132
+ def _ipfs_link(self) -> Optional[str]:
133
+ """Get the ipfs link for the data."""
134
+ return f"{IPFS_ADDRESS}{CID_PREFIX}{self.data.hex()}"
135
+
136
+ @property
137
+ def ipfs_request_link(self) -> Optional[str]:
138
+ """Get the IPFS link for the request."""
139
+ return f"{self._ipfs_link()}/metadata.json"
140
+
141
+ @property
142
+ def ipfs_deliver_link(self) -> Optional[str]:
143
+ """Get the IPFS link for the deliver."""
144
+ if self.requestId is None:
145
+ return None
146
+ return f"{self._ipfs_link()}/{self.requestId}"
147
+
148
+ def ipfs_link(self, event_name: MechEventName) -> Optional[str]:
149
+ """Get the ipfs link based on the event."""
150
+ if event_name == MechEventName.REQUEST:
151
+ if self.for_block < FORMAT_UPDATE_BLOCK_NUMBER:
152
+ return self._ipfs_link()
153
+ return self.ipfs_request_link
154
+ if event_name == MechEventName.DELIVER:
155
+ return self.ipfs_deliver_link
156
+ return None
157
+
158
+
159
+ @dataclass(init=False)
160
+ class MechRequest:
161
+ """A structure for a request to a mech."""
162
+
163
+ request_id: Optional[int]
164
+ request_block: Optional[int]
165
+ prompt_request: Optional[str]
166
+ tool: Optional[str]
167
+ nonce: Optional[str]
168
+ trader_address: Optional[str]
169
+
170
+ def __init__(self, **kwargs: Any) -> None:
171
+ """Initialize the request ignoring extra keys."""
172
+ self.request_id = int(kwargs.pop(REQUEST_ID, 0))
173
+ self.request_block = int(kwargs.pop(BLOCK_FIELD, 0))
174
+ self.prompt_request = kwargs.pop(PROMPT_FIELD, None)
175
+ self.tool = kwargs.pop("tool", None)
176
+ self.nonce = kwargs.pop("nonce", None)
177
+ self.trader_address = kwargs.pop("sender", None)
178
+
179
+
180
+ @dataclass(init=False)
181
+ class PredictionResponse:
182
+ """A response of a prediction."""
183
+
184
+ p_yes: float
185
+ p_no: float
186
+ confidence: float
187
+ info_utility: float
188
+ vote: Optional[str]
189
+ win_probability: Optional[float]
190
+
191
+ def __init__(self, **kwargs: Any) -> None:
192
+ """Initialize the mech's prediction ignoring extra keys."""
193
+ try:
194
+ self.p_yes = float(kwargs.pop("p_yes"))
195
+ self.p_no = float(kwargs.pop("p_no"))
196
+ self.confidence = float(kwargs.pop("confidence"))
197
+ self.info_utility = float(kwargs.pop("info_utility"))
198
+ self.win_probability = 0
199
+
200
+ # Validate probabilities
201
+ probabilities = {
202
+ "p_yes": self.p_yes,
203
+ "p_no": self.p_no,
204
+ "confidence": self.confidence,
205
+ "info_utility": self.info_utility,
206
+ }
207
+
208
+ for name, prob in probabilities.items():
209
+ if not 0 <= prob <= 1:
210
+ raise ValueError(f"{name} probability is out of bounds: {prob}")
211
+
212
+ if self.p_yes + self.p_no != 1:
213
+ raise ValueError(
214
+ f"Sum of p_yes and p_no is not 1: {self.p_yes} + {self.p_no}"
215
+ )
216
+
217
+ self.vote = self.get_vote()
218
+ self.win_probability = self.get_win_probability()
219
+
220
+ except KeyError as e:
221
+ raise KeyError(f"Missing key in PredictionResponse: {e}")
222
+ except ValueError as e:
223
+ raise ValueError(f"Invalid value in PredictionResponse: {e}")
224
+
225
+ def get_vote(self) -> Optional[str]:
226
+ """Return the vote."""
227
+ if self.p_no == self.p_yes:
228
+ return None
229
+ if self.p_no > self.p_yes:
230
+ return "No"
231
+ return "Yes"
232
+
233
+ def get_win_probability(self) -> Optional[float]:
234
+ """Return the probability estimation for winning with vote."""
235
+ return max(self.p_no, self.p_yes)
236
+
237
+
238
+ @dataclass(init=False)
239
+ class MechResponse:
240
+ """A structure for the response of a mech."""
241
+
242
+ request_id: int
243
+ deliver_block: Optional[int]
244
+ result: Optional[PredictionResponse]
245
+ error: Optional[str]
246
+ error_message: Optional[str]
247
+ prompt_response: Optional[str]
248
+ mech_address: Optional[str]
249
+
250
+ def __init__(self, **kwargs: Any) -> None:
251
+ """Initialize the mech's response ignoring extra keys."""
252
+ self.error = kwargs.get("error", None)
253
+ self.request_id = int(kwargs.get(REQUEST_ID, 0))
254
+ self.deliver_block = int(kwargs.get(BLOCK_FIELD, 0))
255
+ self.result = kwargs.get("result", None)
256
+ self.prompt_response = kwargs.get(PROMPT_FIELD, None)
257
+ self.mech_address = kwargs.get("sender", None)
258
+
259
+ if self.result != "Invalid response":
260
+ self.error_message = kwargs.get("error_message", None)
261
+
262
+ try:
263
+ if isinstance(self.result, str):
264
+ kwargs = json.loads(self.result)
265
+ self.result = PredictionResponse(**kwargs)
266
+ self.error = str(False)
267
+
268
+ except JSONDecodeError:
269
+ self.error_message = "Response parsing error"
270
+ self.error = str(True)
271
+
272
+ except Exception as e:
273
+ self.error_message = str(e)
274
+ self.error = str(True)
275
+
276
+ else:
277
+ self.error_message = "Invalid response from tool"
278
+ self.error = str(True)
279
+ self.result = None
280
+
281
+
282
+ EVENT_TO_MECH_STRUCT = {
283
+ MechEventName.REQUEST: MechRequest,
284
+ MechEventName.DELIVER: MechResponse,
285
+ }
286
+
287
+
288
+ def parse_args() -> str:
289
+ """Parse the arguments and return the RPC."""
290
+ if len(sys.argv) != 2:
291
+ raise ValueError("Expected the RPC as a positional argument.")
292
+ return sys.argv[1]
293
+
294
+
295
+ def read_abi(abi_path: str) -> str:
296
+ """Read and return the wxDAI contract's ABI."""
297
+ with open(abi_path) as abi_file:
298
+ return abi_file.read()
299
+
300
+
301
+ def reduce_window(contract_instance, event, from_block, batch_size, latest_block):
302
+ """Dynamically reduce the batch size window."""
303
+ keep_fraction = 1 - REDUCE_FACTOR
304
+ events_filter = contract_instance.events[event].build_filter()
305
+ events_filter.fromBlock = from_block
306
+ batch_size = int(batch_size * keep_fraction)
307
+ events_filter.toBlock = min(from_block + batch_size, latest_block)
308
+ tqdm.write(f"RPC timed out! Resizing batch size to {batch_size}.")
309
+ time.sleep(SLEEP)
310
+ return events_filter, batch_size
311
+
312
+
313
+ def get_events(
314
+ w3: Web3,
315
+ event: str,
316
+ mech_address: ChecksumAddress,
317
+ mech_abi_path: str,
318
+ earliest_block: int,
319
+ latest_block: int,
320
+ ) -> List:
321
+ """Get the delivered events."""
322
+ abi = read_abi(mech_abi_path)
323
+ contract_instance = w3.eth.contract(address=mech_address, abi=abi)
324
+
325
+ events = []
326
+ from_block = earliest_block
327
+ batch_size = BLOCKS_CHUNK_SIZE
328
+ with tqdm(
329
+ total=latest_block - from_block,
330
+ desc=f"Searching {event} events for mech {mech_address}",
331
+ unit="blocks",
332
+ ) as pbar:
333
+ while from_block < latest_block:
334
+ events_filter = contract_instance.events[event].build_filter()
335
+ events_filter.fromBlock = from_block
336
+ events_filter.toBlock = min(from_block + batch_size, latest_block)
337
+
338
+ entries = None
339
+ retries = 0
340
+ while entries is None:
341
+ try:
342
+ entries = events_filter.deploy(w3).get_all_entries()
343
+ retries = 0
344
+ except (RequestsHTTPError, Urllib3HTTPError) as exc:
345
+ if "Request Entity Too Large" in exc.args[0]:
346
+ events_filter, batch_size = reduce_window(
347
+ contract_instance,
348
+ event,
349
+ from_block,
350
+ batch_size,
351
+ latest_block,
352
+ )
353
+ except (Urllib3ReadTimeoutError, RequestsReadTimeoutError):
354
+ events_filter, batch_size = reduce_window(
355
+ contract_instance, event, from_block, batch_size, latest_block
356
+ )
357
+ except Exception as exc:
358
+ retries += 1
359
+ if retries == N_RPC_RETRIES:
360
+ tqdm.write(
361
+ f"Skipping events for blocks {events_filter.fromBlock} - {events_filter.toBlock} "
362
+ f"as the retries have been exceeded."
363
+ )
364
+ break
365
+ sleep = SLEEP * retries
366
+ if (
367
+ (
368
+ isinstance(exc, ValueError)
369
+ and re.match(
370
+ RE_RPC_FILTER_ERROR, exc.args[0].get("message", "")
371
+ )
372
+ is None
373
+ )
374
+ and not isinstance(exc, ValueError)
375
+ and not isinstance(exc, MismatchedABI)
376
+ ):
377
+ tqdm.write(
378
+ f"An error was raised from the RPC: {exc}\n Retrying in {sleep} seconds."
379
+ )
380
+ time.sleep(sleep)
381
+
382
+ from_block += batch_size
383
+ pbar.update(batch_size)
384
+
385
+ if entries is None:
386
+ continue
387
+
388
+ chunk = list(entries)
389
+ events.extend(chunk)
390
+ time.sleep(RPC_POLL_INTERVAL)
391
+
392
+ return events
393
+
394
+
395
+ def parse_events(raw_events: List) -> List[MechEvent]:
396
+ """Parse all the specified MechEvents."""
397
+ parsed_events = []
398
+ for event in raw_events:
399
+ for_block = event.get("blockNumber", 0)
400
+ args = event.get(EVENT_ARGUMENTS, {})
401
+ request_id = args.get(REQUEST_ID, 0)
402
+ data = args.get(DATA, b"")
403
+ sender = args.get(REQUEST_SENDER, "")
404
+ parsed_event = MechEvent(for_block, request_id, data, sender)
405
+ parsed_events.append(parsed_event)
406
+
407
+ return parsed_events
408
+
409
+
410
+ def create_session() -> requests.Session:
411
+ """Create a session with a retry strategy."""
412
+ session = requests.Session()
413
+ retry_strategy = Retry(
414
+ total=N_IPFS_RETRIES + 1,
415
+ backoff_factor=BACKOFF_FACTOR,
416
+ status_forcelist=STATUS_FORCELIST,
417
+ )
418
+ adapter = HTTPAdapter(max_retries=retry_strategy)
419
+ for protocol in (HTTP, HTTPS):
420
+ session.mount(protocol, adapter)
421
+
422
+ return session
423
+
424
+
425
+ def request(
426
+ session: requests.Session, url: str, timeout: int = HTTP_TIMEOUT
427
+ ) -> Optional[requests.Response]:
428
+ """Perform a request with a session."""
429
+ try:
430
+ response = session.get(url, timeout=timeout)
431
+ response.raise_for_status()
432
+ except requests.exceptions.HTTPError as exc:
433
+ tqdm.write(f"HTTP error occurred: {exc}.")
434
+ except Exception as exc:
435
+ tqdm.write(f"Unexpected error occurred: {exc}.")
436
+ else:
437
+ return response
438
+ return None
439
+
440
+
441
+ def limit_text(text: str, limit: int = 200) -> str:
442
+ """Limit the given text"""
443
+ if len(text) > limit:
444
+ return f"{text[:limit]}..."
445
+ return text
446
+
447
+
448
+ def parse_ipfs_response(
449
+ session: requests.Session,
450
+ url: str,
451
+ event: MechEvent,
452
+ event_name: MechEventName,
453
+ response: requests.Response,
454
+ ) -> Optional[Dict[str, str]]:
455
+ """Parse a response from IPFS."""
456
+ try:
457
+ return response.json()
458
+ except requests.exceptions.JSONDecodeError:
459
+ # this is a workaround because the `metadata.json` file was introduced and removed multiple times
460
+ if event_name == MechEventName.REQUEST and url != event.ipfs_request_link:
461
+ url = event.ipfs_request_link
462
+ response = request(session, url)
463
+ if response is None:
464
+ tqdm.write(f"Skipping {event=}.")
465
+ return None
466
+
467
+ try:
468
+ return response.json()
469
+ except requests.exceptions.JSONDecodeError:
470
+ pass
471
+
472
+ tqdm.write(f"Failed to parse response into json for {url=}.")
473
+ return None
474
+
475
+
476
+ def parse_ipfs_tools_content(
477
+ raw_content: Dict[str, str], event: MechEvent, event_name: MechEventName
478
+ ) -> Optional[Union[MechRequest, MechResponse]]:
479
+ """Parse tools content from IPFS."""
480
+ struct = EVENT_TO_MECH_STRUCT.get(event_name)
481
+ raw_content[REQUEST_ID] = str(event.requestId)
482
+ raw_content[BLOCK_FIELD] = str(event.for_block)
483
+ raw_content["sender"] = str(event.sender)
484
+
485
+ try:
486
+ mech_response = struct(**raw_content)
487
+ except (ValueError, TypeError, KeyError):
488
+ tqdm.write(f"Could not parse {limit_text(str(raw_content))}")
489
+ return None
490
+
491
+ if event_name == MechEventName.REQUEST and mech_response.tool in IRRELEVANT_TOOLS:
492
+ return None
493
+
494
+ return mech_response
495
+
496
+
497
+ def get_contents(
498
+ session: requests.Session, events: List[MechEvent], event_name: MechEventName
499
+ ) -> pd.DataFrame:
500
+ """Fetch the tools' responses."""
501
+ contents = []
502
+ for event in tqdm(events, desc=f"Tools' results", unit="results"):
503
+ url = event.ipfs_link(event_name)
504
+ response = request(session, url)
505
+ if response is None:
506
+ tqdm.write(f"Skipping {event=}.")
507
+ continue
508
+
509
+ raw_content = parse_ipfs_response(session, url, event, event_name, response)
510
+ if raw_content is None:
511
+ continue
512
+
513
+ mech_response = parse_ipfs_tools_content(raw_content, event, event_name)
514
+ if mech_response is None:
515
+ continue
516
+ contents.append(mech_response)
517
+ time.sleep(IPFS_POLL_INTERVAL)
518
+
519
+ return pd.DataFrame(contents)
520
+
521
+
522
+ def check_for_dicts(df: pd.DataFrame) -> List[str]:
523
+ """Check for columns that contain dictionaries."""
524
+ dict_columns = []
525
+ for column in df.columns:
526
+ if df[column].apply(lambda x: isinstance(x, dict)).any():
527
+ dict_columns.append(column)
528
+ return dict_columns
529
+
530
+
531
+ def drop_dict_rows(df: pd.DataFrame,
532
+ dict_columns: List[str]) -> pd.DataFrame:
533
+ """Drop rows that contain dictionaries."""
534
+ for column in dict_columns:
535
+ df = df[~df[column].apply(lambda x: isinstance(x, dict))]
536
+ return df
537
+
538
+
539
+ def clean(df: pd.DataFrame) -> pd.DataFrame:
540
+ """Clean the dataframe."""
541
+ dict_columns = check_for_dicts(df)
542
+ df = drop_dict_rows(df, dict_columns)
543
+ cleaned = df.drop_duplicates()
544
+ cleaned[REQUEST_ID_FIELD] = cleaned[REQUEST_ID_FIELD].astype("str")
545
+ return cleaned
546
+
547
+
548
+ def transform_request(contents: pd.DataFrame) -> pd.DataFrame:
549
+ """Transform the requests dataframe."""
550
+ return clean(contents)
551
+
552
+
553
+ def transform_deliver(contents: pd.DataFrame, full_contents=False) -> pd.DataFrame:
554
+ """Transform the delivers dataframe."""
555
+ unpacked_result = pd.json_normalize(contents.result)
556
+ # # drop result column if it exists
557
+ if "result" in unpacked_result.columns:
558
+ unpacked_result.drop(columns=["result"], inplace=True)
559
+
560
+ # drop prompt column if it exists
561
+ if "prompt" in unpacked_result.columns:
562
+ unpacked_result.drop(columns=["prompt"], inplace=True)
563
+
564
+ # rename prompt column to prompt_deliver
565
+ unpacked_result.rename(columns={"prompt": "prompt_deliver"}, inplace=True)
566
+ contents = pd.concat((contents, unpacked_result), axis=1)
567
+
568
+ if "result" in contents.columns:
569
+ contents.drop(columns=["result"], inplace=True)
570
+
571
+ if "prompt" in contents.columns:
572
+ contents.drop(columns=["prompt"], inplace=True)
573
+
574
+ return clean(contents)
575
+
576
+
577
+ def gen_event_filename(event_name: MechEventName) -> str:
578
+ """Generate the filename of an event."""
579
+ return f"{event_name.value.lower()}s.csv"
580
+
581
+
582
+ def read_n_last_lines(filename: str, n: int = 1) -> str:
583
+ """Return the `n` last lines' content of a file."""
584
+ num_newlines = 0
585
+ with open(filename, "rb") as f:
586
+ try:
587
+ f.seek(-2, os.SEEK_END)
588
+ while num_newlines < n:
589
+ f.seek(-2, os.SEEK_CUR)
590
+ if f.read(1) == b"\n":
591
+ num_newlines += 1
592
+ except OSError:
593
+ f.seek(0)
594
+ last_line = f.readline().decode()
595
+ return last_line
596
+
597
+
598
+ def get_earliest_block(event_name: MechEventName) -> int:
599
+ """Get the earliest block number to use when filtering for events."""
600
+ filename = gen_event_filename(event_name)
601
+ if not os.path.exists(filename):
602
+ return 0
603
+
604
+ cols = pd.read_csv(filename, index_col=0, nrows=0).columns.tolist()
605
+ last_line_buff = StringIO(read_n_last_lines(filename))
606
+ last_line_series = pd.read_csv(last_line_buff, names=cols)
607
+ block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
608
+ return int(last_line_series[block_field].values[0])
609
+
610
+
611
+ def store_progress(
612
+ filename: str,
613
+ event_to_contents: Dict[MechEventName, pd.DataFrame],
614
+ tools: pd.DataFrame,
615
+ ) -> None:
616
+ """Store the given progress."""
617
+ if filename:
618
+ for event_name, content in event_to_contents.items():
619
+ event_filename = gen_event_filename(event_name)
620
+
621
+ if "result" in content.columns:
622
+ content.drop(columns=["result"], inplace=True)
623
+
624
+ content.to_csv(event_filename, index=False, escapechar="\\")
625
+
626
+ # drop result and error columns
627
+ if "result" in tools.columns:
628
+ tools.drop(columns=["result"], inplace=True)
629
+
630
+ tools.to_csv(filename, index=False, escapechar="\\")
631
+
632
+
633
+ def etl(
634
+ rpcs: List[str], filename: Optional[str] = None, full_contents: bool = True
635
+ ) -> pd.DataFrame:
636
+ """Fetch from on-chain events, process, store and return the tools' results on all the questions as a Dataframe."""
637
+ w3s = [Web3(HTTPProvider(r)) for r in rpcs]
638
+ session = create_session()
639
+ event_to_transformer = {
640
+ MechEventName.REQUEST: transform_request,
641
+ MechEventName.DELIVER: transform_deliver,
642
+ }
643
+ mech_to_info = {
644
+ to_checksum_address(address): (
645
+ os.path.join(CONTRACTS_PATH, filename),
646
+ earliest_block,
647
+ )
648
+ for address, (filename, earliest_block) in MECH_TO_INFO.items()
649
+ }
650
+ event_to_contents = {}
651
+
652
+ latest_block = LATEST_BLOCK
653
+ if latest_block is None:
654
+ latest_block = w3s[0].eth.get_block(LATEST_BLOCK_NAME)[BLOCK_DATA_NUMBER]
655
+
656
+ next_start_block = None
657
+
658
+ # Loop through events in event_to_transformer
659
+ for event_name, transformer in event_to_transformer.items():
660
+ if next_start_block is None:
661
+ next_start_block_base = get_earliest_block(event_name)
662
+
663
+ # Loop through mech addresses in mech_to_info
664
+ events = []
665
+ for address, (abi, earliest_block) in mech_to_info.items():
666
+ if next_start_block_base == 0:
667
+ next_start_block = earliest_block
668
+ else:
669
+ next_start_block = next_start_block_base
670
+
671
+ print(
672
+ f"Searching for {event_name.value} events for mech {address} from block {next_start_block} to {latest_block}."
673
+ )
674
+
675
+ # parallelize the fetching of events
676
+ with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
677
+ futures = []
678
+ for i in range(
679
+ next_start_block, latest_block, BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE
680
+ ):
681
+ futures.append(
682
+ executor.submit(
683
+ get_events,
684
+ random.choice(w3s),
685
+ event_name.value,
686
+ address,
687
+ abi,
688
+ i,
689
+ min(i + BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE, latest_block),
690
+ )
691
+ )
692
+
693
+ for future in tqdm(
694
+ as_completed(futures),
695
+ total=len(futures),
696
+ desc=f"Fetching {event_name.value} Events",
697
+ ):
698
+ current_mech_events = future.result()
699
+ events.extend(current_mech_events)
700
+
701
+ parsed = parse_events(events)
702
+
703
+ contents = []
704
+ with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
705
+ futures = []
706
+ for i in range(0, len(parsed), GET_CONTENTS_BATCH_SIZE):
707
+ futures.append(
708
+ executor.submit(
709
+ get_contents,
710
+ session,
711
+ parsed[i : i + GET_CONTENTS_BATCH_SIZE],
712
+ event_name,
713
+ )
714
+ )
715
+
716
+ for future in tqdm(
717
+ as_completed(futures),
718
+ total=len(futures),
719
+ desc=f"Fetching {event_name.value} Contents",
720
+ ):
721
+ current_mech_contents = future.result()
722
+ contents.append(current_mech_contents)
723
+
724
+ contents = pd.concat(contents, ignore_index=True)
725
+
726
+ full_contents = True
727
+ if event_name == MechEventName.REQUEST:
728
+ transformed = transformer(contents)
729
+ elif event_name == MechEventName.DELIVER:
730
+ transformed = transformer(contents, full_contents=full_contents)
731
+
732
+ events_filename = gen_event_filename(event_name)
733
+
734
+ if os.path.exists(events_filename):
735
+ old = pd.read_csv(events_filename)
736
+
737
+ # Reset index to avoid index conflicts
738
+ old.reset_index(drop=True, inplace=True)
739
+ transformed.reset_index(drop=True, inplace=True)
740
+
741
+ # Concatenate DataFrames
742
+ transformed = pd.concat([old, transformed], ignore_index=True)
743
+
744
+ # Drop duplicates if necessary
745
+ transformed.drop_duplicates(subset=REQUEST_ID_FIELD, inplace=True)
746
+
747
+ event_to_contents[event_name] = transformed.copy()
748
+
749
+ # Store progress
750
+ tools = pd.merge(*event_to_contents.values(), on=REQUEST_ID_FIELD)
751
+ store_progress(filename, event_to_contents, tools)
752
+
753
+ return tools
754
+
755
+
756
+ if __name__ == "__main__":
757
+ RPCs = [
758
+ "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",
759
+ ]
760
+
761
+ tools = etl(rpcs=RPCs, filename=DEFAULT_FILENAME, full_contents=True)
scripts/weekly_analysis.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ import os
4
+ from datetime import datetime
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from tqdm import tqdm
7
+ from web3 import Web3
8
+ from typing import Optional
9
+ import pandas as pd
10
+ from functools import partial
11
+ from markets import (
12
+ etl as mkt_etl,
13
+ DEFAULT_FILENAME as MARKETS_FILENAME,
14
+ )
15
+ from tools import (
16
+ etl as tools_etl,
17
+ DEFAULT_FILENAME as TOOLS_FILENAME,
18
+ )
19
+ from profitability import run_profitability_analysis
20
+
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+
24
+
25
+ def get_question(text: str) -> str:
26
+ """Get the question from a text."""
27
+ # Regex to find text within double quotes
28
+ pattern = r'"([^"]*)"'
29
+
30
+ # Find all occurrences
31
+ questions = re.findall(pattern, text)
32
+
33
+ # Assuming you want the first question if there are multiple
34
+ question = questions[0] if questions else None
35
+
36
+ return question
37
+
38
+
39
+ def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
40
+ """Get the current answer for a question."""
41
+ row = fpmms[fpmms['title'] == text]
42
+ if row.shape[0] == 0:
43
+ return None
44
+ return row['currentAnswer'].values[0]
45
+
46
+
47
+ def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
48
+ """Convert a block number to a timestamp."""
49
+ block = web3.eth.get_block(block_number)
50
+ timestamp = datetime.utcfromtimestamp(block['timestamp'])
51
+ return timestamp.strftime('%Y-%m-%d %H:%M:%S')
52
+
53
+
54
+ def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
55
+ """Parallelize the timestamp conversion."""
56
+ block_numbers = df['request_block'].tolist()
57
+ with ThreadPoolExecutor(max_workers=10) as executor:
58
+ results = list(tqdm(executor.map(function, block_numbers), total=len(block_numbers)))
59
+ return results
60
+
61
+
62
+ def weekly_analysis():
63
+ """Run weekly analysis for the FPMMS project."""
64
+ rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
65
+ web3 = Web3(Web3.HTTPProvider(rpc))
66
+
67
+ # Run markets ETL
68
+ logging.info("Running markets ETL")
69
+ mkt_etl(MARKETS_FILENAME)
70
+ logging.info("Markets ETL completed")
71
+
72
+ # Run tools ETL
73
+ logging.info("Running tools ETL")
74
+ tools_etl(
75
+ rpcs=[rpc],
76
+ filename=TOOLS_FILENAME,
77
+ full_contents=True,
78
+ )
79
+ logging.info("Tools ETL completed")
80
+
81
+ # Run profitability analysis
82
+ logging.info("Running profitability analysis")
83
+ if os.path.exists("fpmmTrades.csv"):
84
+ os.remove("fpmmTrades.csv")
85
+ run_profitability_analysis(
86
+ rpc=rpc,
87
+ )
88
+ logging.info("Profitability analysis completed")
89
+
90
+ # Get currentAnswer from FPMMS
91
+ fpmms = pd.read_csv(MARKETS_FILENAME)
92
+ tools = pd.read_csv(TOOLS_FILENAME)
93
+
94
+ # Get the question from the tools
95
+ logging.info("Getting the question and current answer for the tools")
96
+ tools['title'] = tools['prompt_request'].apply(lambda x: get_question(x))
97
+ tools['currentAnswer'] = tools['title'].apply(lambda x: current_answer(x, fpmms))
98
+
99
+ tools['currentAnswer'] = tools['currentAnswer'].str.replace('yes', 'Yes')
100
+ tools['currentAnswer'] = tools['currentAnswer'].str.replace('no', 'No')
101
+
102
+ # Convert block number to timestamp
103
+ logging.info("Converting block number to timestamp")
104
+
105
+ partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)
106
+ missing_timestamps = parallelize_timestamp_conversion(tools, partial_block_number_to_timestamp)
107
+ tools['request_time'] = missing_timestamps
108
+ tools['request_month_year'] = pd.to_datetime(tools['request_time']).dt.strftime('%Y-%m')
109
+ tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)
110
+
111
+ # Save the tools
112
+ tools.to_csv(TOOLS_FILENAME, index=False)
113
+
114
+ logging.info("Weekly analysis files generated and saved")
115
+
116
+
117
+ if __name__ == "__main__":
118
+ weekly_analysis()
119
+