Spaces:

arshy
/

weekly-analysis

Sleeping

App Files Files Community

arshy commited on Apr 18

Commit

0869b01

•

1 Parent(s): 8801d28

initial commit

Browse files

Files changed (15) hide show

.gitattributes +1 -0
app copy.py +135 -0
app.py +163 -0
data/all_trades_profitability.csv +3 -0
data/delivers.csv +3 -0
data/fpmmTrades.csv +3 -0
data/fpmms.csv +3 -0
data/requests.csv +3 -0
data/summary_profitability.csv +3 -0
data/tools.csv +3 -0
requirements.txt +7 -0
scripts/markets.py +225 -0
scripts/profitability.py +631 -0
scripts/tools.py +761 -0
scripts/weekly_analysis.py +119 -0

.gitattributes CHANGED Viewed

@@ -16,6 +16,7 @@
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text

 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text

app copy.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import gradio as gr
+import pandas as pd
+tools = pd.read_csv("./data/tools.csv")
+# all_trades = pd.read_csv('./data/all_trades_profitability.csv')
+demo = gr.Blocks()
+INC_TOOLS = [
+    'prediction-online',
+    'prediction-offline',
+    'claude-prediction-online',
+    'claude-prediction-offline',
+    'prediction-offline-sme',
+    'prediction-online-sme',
+    'prediction-request-rag',
+    'prediction-request-reasoning',
+    'prediction-url-cot-claude',
+    'prediction-request-rag-claude',
+    'prediction-request-reasoning-claude'
+]
+def set_error(row):
+    if row.error not in [True, False]:
+        if not row.prompt_response:
+            return True
+        return False
+    return row.error
+def get_error_data():
+    tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
+    tools_inc['error'] = tools_inc.apply(set_error, axis=1)
+    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
+    error['error_perc'] = (error[True] / (error[False] + error[True]))*100
+    error['total_requests'] = error[False] + error[True]
+    return error
+def get_error_data_all(error):
+    error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True:'sum'}).reset_index()
+    error_total['error_perc'] = (error_total[True] / error_total['total_requests'])*100
+    # convert column name to string
+    error_total.columns = error_total.columns.astype(str)
+    # format all values to 4 decimal places for error_perc
+    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
+    return error_total
+error = get_error_data()
+error_all = get_error_data_all(error)
+print(error_all.head())
+with demo:
+    gr.HTML("<h1>Olas Predict Actual Performance</h1>")
+    gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
+    with gr.Tabs():
+        with gr.TabItem("🔥 Error Dashboard"):
+            with gr.Row():
+                gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
+            with gr.Row():
+                # plot
+                with gr.Column():
+                    gr.LinePlot(
+                        value=error_all,
+                        x="request_month_year_week",
+                        y="error_perc",
+                        title="Error Percentage",
+                        x_title="Week",
+                        y_title="Error Percentage",
+                        height=400,
+                        show_label=True
+                    )
+            gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
+            # Dropdown for selecting the tool
+            sel_tool = gr.Dropdown(
+                value="prediction-online",
+                choices=INC_TOOLS,
+                label="Select a tool"
+            )
+            plot_tool_error = gr.LinePlot(
+                title="Error Percentage",
+                x_title="Week",
+                y_title="Error Percentage",
+                render=False
+            )
+            # Dropdown for selecting the week
+            sel_week = gr.Dropdown(
+                value=error['request_month_year_week'].iloc[-1],
+                choices=error['request_month_year_week'].unique().tolist(),
+                label="Select a week"
+            )
+            plot_week_error = gr.BarPlot(
+                title="Error Percentage",
+                x_title="Tool",
+                y_title="Error Percentage",
+                render=False
+            )
+            def update_tool_plot(selected_tool):
+                filtered_data = error[error['tool'] == selected_tool]
+                # convert column name to string
+                filtered_data.columns = filtered_data.columns.astype(str)
+                # conver error_perc to 4 decimal place
+                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
+                print(filtered_data.head())
+                return {
+                    "x": filtered_data['request_month_year_week'].tolist(),
+                    "y": filtered_data['error_perc'].tolist(),
+                }
+            def update_week_plot(selected_week):
+                filtered_data = error[error['request_month_year_week'] == selected_week]
+                filtered_data.columns = filtered_data.columns.astype(str)
+                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
+                print(filtered_data.head())
+                return {
+                    "x": filtered_data['tool'].tolist(),
+                    "y": filtered_data['error_perc'].tolist(),
+                }
+            sel_tool.change(fn=update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
+            sel_week.change(fn=update_week_plot, inputs=sel_week, outputs=plot_week_error)
+            with gr.Row():
+                plot_tool_error.render()
+            with gr.Row():
+                plot_week_error.render()
+        with gr.TabItem("ℹ️ About"):
+            with gr.Accordion("About the Benchmark", open=False):
+                gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
+demo.queue(default_concurrency_limit=40).launch()

app.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import gradio as gr
+import pandas as pd
+tools = pd.read_csv("./data/tools.csv")
+demo = gr.Blocks()
+INC_TOOLS = [
+    'prediction-online',
+    'prediction-offline',
+    'claude-prediction-online',
+    'claude-prediction-offline',
+    'prediction-offline-sme',
+    'prediction-online-sme',
+    'prediction-request-rag',
+    'prediction-request-reasoning',
+    'prediction-url-cot-claude',
+    'prediction-request-rag-claude',
+    'prediction-request-reasoning-claude'
+]
+def set_error(row):
+    if row.error not in [True, False]:
+        if not row.prompt_response:
+            return True
+        return False
+    return row.error
+def get_error_data():
+    tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
+    tools_inc['error'] = tools_inc.apply(set_error, axis=1)
+    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
+    error['error_perc'] = (error[True] / (error[False] + error[True])) * 100
+    error['total_requests'] = error[False] + error[True]
+    return error
+def get_error_data_all(error):
+    error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()
+    error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100
+    error_total.columns = error_total.columns.astype(str)
+    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
+    return error_total
+error = get_error_data()
+error_all = get_error_data_all(error)
+with demo:
+    gr.HTML("<h1>Olas Predict Actual Performance</h1>")
+    gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
+    with gr.Tabs():
+        with gr.TabItem("🔥 Error Dashboard"):
+            with gr.Row():
+                gr.Markdown("# Plot showing overall error")
+            with gr.Row():
+                # plot
+                with gr.Column():
+                    gr.BarPlot(
+                        value=error_all,
+                        x="request_month_year_week",
+                        y="error_perc",
+                        title="Error Percentage",
+                        x_title="Week",
+                        y_title="Error Percentage",
+                        height=800,
+                        show_label=True,
+                        interactive=True,
+                        show_actions_button=True,
+                        tooltip=["request_month_year_week", "error_perc"]
+                    )
+            with gr.Row():
+                gr.Markdown("# Plot showing error by tool")
+            with gr.Row():
+                sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0])
+            with gr.Row():
+                plot_tool_error = gr.BarPlot(
+                    title="Error Percentage",
+                    x_title="Week",
+                    y_title="Error Percentage",
+                    show_label=True,
+                    interactive=True,
+                    show_actions_button=True,
+                    tooltip=["request_month_year_week", "error_perc"],
+                    width=800
+                )
+            with gr.Row():
+                gr.Markdown("# Plot showing error by week")
+            with gr.Row():
+                choices = error['request_month_year_week'].unique().tolist()
+                # sort the choices by the latest week to be on the top
+                choices = sorted(choices)
+                sel_week = gr.Dropdown(
+                    label="Select a week",
+                    choices=choices,
+                    value=choices[-1]
+                    )
+            with gr.Row():
+                plot_week_error = gr.BarPlot(
+                    title="Error Percentage",
+                    x_title="Tool",
+                    y_title="Error Percentage",
+                    show_label=True,
+                    interactive=True,
+                    show_actions_button=True,
+                    tooltip=["tool", "error_perc"],
+                    width=800
+                )
+            def update_tool_plot(selected_tool):
+                filtered_data = error[error['tool'] == selected_tool]
+                # convert column name to string
+                filtered_data.columns = filtered_data.columns.astype(str)
+                # convert error_perc to 4 decimal place
+                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
+                update = gr.LinePlot(
+                    title="Error Percentage",
+                    x_title="Week",
+                    y_title="Error Percentage",
+                    x="request_month_year_week",
+                    y="error_perc",
+                    value=filtered_data
+                )
+                return update
+            def update_week_plot(selected_week):
+                filtered_data = error[error['request_month_year_week'] == selected_week]
+                # convert column name to string
+                filtered_data.columns = filtered_data.columns.astype(str)
+                # convert error_perc to 4 decimal place
+                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
+                update = gr.BarPlot(
+                    title="Error Percentage",
+                    x_title="Tool",
+                    y_title="Error Percentage",
+                    x="tool",
+                    y="error_perc",
+                    value=filtered_data
+                )
+                return update
+            sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
+            sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error)
+            with gr.Row():
+                sel_tool
+            with gr.Row():
+                plot_tool_error
+            with gr.Row():
+                sel_week
+            with gr.Row():
+                plot_week_error
+        with gr.TabItem("ℹ️ About"):
+            with gr.Accordion("About the Benchmark"):
+                gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
+demo.queue(default_concurrency_limit=40).launch()

data/all_trades_profitability.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28ee508150a1cba56c9439d0cbfcf4871cb9f32f0792eb1d4dd7bca95af1e903
+size 28328169

data/delivers.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dafbbf73918de11435cbeaee7196ab0f37a18b06656a0c5325b1fa86be98b2c
+size 1121772123

data/fpmmTrades.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a16a49dac94891d4438ea4eba6a52d6ef00f2985bbcc0e41daeb6f8557f5536
+size 62639698

data/fpmms.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7933c45ab45cf377b55dbdc49f413ede81a7582cd843717c70cdd71f8fa7b74
+size 391125

data/requests.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70d06d62c1fe5dd50fe5c7e3066413e843eb536cc51f08325fd85570b8255007
+size 124945839

data/summary_profitability.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4769b2c800f4a3c655de8a5673070c5be00ce5733798cc9a745cc5df2f961a6
+size 46612

data/tools.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4045250b8b4ec74ca3d37ce94208665c1ea09042b6681106615f0773ce46aee0
+size 1211219315

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pandas
+matplotlib
+huggingface-hub
+pyarrow
+web3
+requests
+gradio

scripts/markets.py ADDED Viewed

	@@ -0,0 +1,225 @@

+#   -*- coding: utf-8 -*-
+#   ------------------------------------------------------------------------------
+#
+#     Copyright 2023 Valory AG
+#
+#     Licensed under the Apache License, Version 2.0 (the "License");
+#     you may not use this file except in compliance with the License.
+#     You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#     Unless required by applicable law or agreed to in writing, software
+#     distributed under the License is distributed on an "AS IS" BASIS,
+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#     See the License for the specific language governing permissions and
+#     limitations under the License.
+#
+#   ------------------------------------------------------------------------------
+import functools
+import warnings
+from string import Template
+from typing import Optional, Generator, Callable
+import pandas as pd
+import requests
+from tqdm import tqdm
+from typing import List, Dict
+ResponseItemType = List[Dict[str, str]]
+SubgraphResponseType = Dict[str, ResponseItemType]
+CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
+BATCH_SIZE = 1000
+OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
+FPMMS_FIELD = "fixedProductMarketMakers"
+QUERY_FIELD = "query"
+ERROR_FIELD = "errors"
+DATA_FIELD = "data"
+ID_FIELD = "id"
+ANSWER_FIELD = "currentAnswer"
+QUESTION_FIELD = "question"
+OUTCOMES_FIELD = "outcomes"
+TITLE_FIELD = "title"
+MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+DEFAULT_FILENAME = "fpmms.csv"
+FPMMS_QUERY = Template(
+    """
+    {
+      ${fpmms_field}(
+        where: {
+          creator: "${creator}",
+          id_gt: "${fpmm_id}",
+          isPendingArbitration: false
+        },
+        orderBy: ${id_field}
+        first: ${first}
+      ){
+        ${id_field}
+        ${answer_field}
+        ${question_field} {
+          ${outcomes_field}
+        }
+        ${title_field}
+      }
+    }
+    """
+)
+class RetriesExceeded(Exception):
+    """Exception to raise when retries are exceeded during data-fetching."""
+    def __init__(
+        self, msg="Maximum retries were exceeded while trying to fetch the data!"
+    ):
+        super().__init__(msg)
+def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
+    """Create a hacky retry strategy.
+        Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
+        because the subgraph does not return the appropriate status codes in case of failure.
+        Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
+        catch those exceptions in the hacky retry decorator and try again.
+        Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
+    :param func: the input request function.
+    :param n_retries: the maximum allowed number of retries.
+    :return: The request method with the hacky retry strategy applied.
+    """
+    @functools.wraps(func)
+    def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
+        """The wrapper for the hacky retry.
+        :return: a response dictionary.
+        """
+        retried = 0
+        while retried <= n_retries:
+            try:
+                if retried > 0:
+                    warnings.warn(f"Retrying {retried}/{n_retries}...")
+                return func(*args, **kwargs)
+            except (ValueError, ConnectionError) as e:
+                warnings.warn(e.args[0])
+            finally:
+                retried += 1
+        raise RetriesExceeded()
+    return wrapper_hacky_retry
+@hacky_retry
+def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
+    """Query a subgraph.
+    Args:
+        url: the subgraph's URL.
+        query: the query to be used.
+        key: the key to use in order to access the required data.
+    Returns:
+        a response dictionary.
+    """
+    content = {QUERY_FIELD: query}
+    headers = {
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+    }
+    res = requests.post(url, json=content, headers=headers)
+    if res.status_code != 200:
+        raise ConnectionError(
+            "Something went wrong while trying to communicate with the subgraph "
+            f"(Error: {res.status_code})!\n{res.text}"
+        )
+    body = res.json()
+    if ERROR_FIELD in body.keys():
+        raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
+    data = body.get(DATA_FIELD, {}).get(key, None)
+    if data is None:
+        raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
+    return data
+def fpmms_fetcher() -> Generator[ResponseItemType, int, None]:
+    """An indefinite fetcher for the FPMMs."""
+    while True:
+        fpmm_id = yield
+        fpmms_query = FPMMS_QUERY.substitute(
+            creator=CREATOR,
+            fpmm_id=fpmm_id,
+            fpmms_field=FPMMS_FIELD,
+            first=BATCH_SIZE,
+            id_field=ID_FIELD,
+            answer_field=ANSWER_FIELD,
+            question_field=QUESTION_FIELD,
+            outcomes_field=OUTCOMES_FIELD,
+            title_field=TITLE_FIELD,
+        )
+        yield query_subgraph(OMEN_SUBGRAPH, fpmms_query, FPMMS_FIELD)
+def fetch_fpmms() -> pd.DataFrame:
+    """Fetch all the fpmms of the creator."""
+    latest_id = ""
+    fpmms = []
+    fetcher = fpmms_fetcher()
+    for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
+        batch = fetcher.send(latest_id)
+        if len(batch) == 0:
+            break
+        latest_id = batch[-1].get(ID_FIELD, "")
+        if latest_id == "":
+            raise ValueError(f"Unexpected data format retrieved: {batch}")
+        fpmms.extend(batch)
+    return pd.DataFrame(fpmms)
+def get_answer(fpmm: pd.Series) -> str:
+    """Get an answer from its index, using Series of an FPMM."""
+    return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]
+def transform_fpmms(fpmms: pd.DataFrame) -> pd.DataFrame:
+    """Transform an FPMMS dataframe."""
+    transformed = fpmms.dropna()
+    transformed = transformed.drop_duplicates([ID_FIELD])
+    transformed = transformed.loc[transformed[ANSWER_FIELD] != MAX_UINT_HEX]
+    transformed.loc[:, ANSWER_FIELD] = (
+        transformed[ANSWER_FIELD].str.slice(-1).astype(int)
+    )
+    transformed.loc[:, ANSWER_FIELD] = transformed.apply(get_answer, axis=1)
+    transformed = transformed.drop(columns=[QUESTION_FIELD])
+    return transformed
+def etl(filename: Optional[str] = None) -> pd.DataFrame:
+    """Fetch, process, store and return the markets as a Dataframe."""
+    fpmms = fetch_fpmms()
+    fpmms = transform_fpmms(fpmms)
+    if filename:
+        fpmms.to_csv(filename, index=False)
+    return fpmms
+if __name__ == "__main__":
+    etl(DEFAULT_FILENAME)

scripts/profitability.py ADDED Viewed

	@@ -0,0 +1,631 @@

+#   -*- coding: utf-8 -*-
+#   ------------------------------------------------------------------------------
+#
+#     Copyright 2023 Valory AG
+#
+#     Licensed under the Apache License, Version 2.0 (the "License");
+#     you may not use this file except in compliance with the License.
+#     You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#     Unless required by applicable law or agreed to in writing, software
+#     distributed under the License is distributed on an "AS IS" BASIS,
+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#     See the License for the specific language governing permissions and
+#     limitations under the License.
+#
+#   ------------------------------------------------------------------------------
+import time
+import requests
+import datetime
+import pandas as pd
+from collections import defaultdict
+from typing import Any, Union
+from string import Template
+from enum import Enum
+from tqdm import tqdm
+import numpy as np
+IRRELEVANT_TOOLS = [
+    "openai-text-davinci-002",
+    "openai-text-davinci-003",
+    "openai-gpt-3.5-turbo",
+    "openai-gpt-4",
+    "stabilityai-stable-diffusion-v1-5",
+    "stabilityai-stable-diffusion-xl-beta-v2-2-2",
+    "stabilityai-stable-diffusion-512-v2-1",
+    "stabilityai-stable-diffusion-768-v2-1",
+    "deepmind-optimization-strong",
+    "deepmind-optimization",
+]
+QUERY_BATCH_SIZE = 1000
+DUST_THRESHOLD = 10000000000000
+INVALID_ANSWER_HEX = (
+    "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+)
+INVALID_ANSWER = -1
+FPMM_CREATOR = "0x89c5cc945dd550bcffb72fe42bff002429f46fec"
+DEFAULT_FROM_DATE = "1970-01-01T00:00:00"
+DEFAULT_TO_DATE = "2038-01-19T03:14:07"
+DEFAULT_FROM_TIMESTAMP = 0
+DEFAULT_TO_TIMESTAMP = 2147483647
+WXDAI_CONTRACT_ADDRESS = "0xe91D153E0b41518A2Ce8Dd3D7944Fa863463a97d"
+DEFAULT_MECH_FEE = 0.01
+DUST_THRESHOLD = 10000000000000
+class MarketState(Enum):
+    """Market state"""
+    OPEN = 1
+    PENDING = 2
+    FINALIZING = 3
+    ARBITRATING = 4
+    CLOSED = 5
+    def __str__(self) -> str:
+        """Prints the market status."""
+        return self.name.capitalize()
+class MarketAttribute(Enum):
+    """Attribute"""
+    NUM_TRADES = "Num_trades"
+    WINNER_TRADES = "Winner_trades"
+    NUM_REDEEMED = "Num_redeemed"
+    INVESTMENT = "Investment"
+    FEES = "Fees"
+    MECH_CALLS = "Mech_calls"
+    MECH_FEES = "Mech_fees"
+    EARNINGS = "Earnings"
+    NET_EARNINGS = "Net_earnings"
+    REDEMPTIONS = "Redemptions"
+    ROI = "ROI"
+    def __str__(self) -> str:
+        """Prints the attribute."""
+        return self.value
+    def __repr__(self) -> str:
+        """Prints the attribute representation."""
+        return self.name
+    @staticmethod
+    def argparse(s: str) -> "MarketAttribute":
+        """Performs string conversion to MarketAttribute."""
+        try:
+            return MarketAttribute[s.upper()]
+        except KeyError as e:
+            raise ValueError(f"Invalid MarketAttribute: {s}") from e
+ALL_TRADES_STATS_DF_COLS = [
+    "trader_address",
+    "trade_id",
+    "creation_timestamp",
+    "title",
+    "market_status",
+    "collateral_amount",
+    "outcome_index",
+    "trade_fee_amount",
+    "outcomes_tokens_traded",
+    "current_answer",
+    "is_invalid",
+    "winning_trade",
+    "earnings",
+    "redeemed",
+    "redeemed_amount",
+    "num_mech_calls",
+    "mech_fee_amount",
+    "net_earnings",
+    "roi",
+]
+SUMMARY_STATS_DF_COLS = [
+    "trader_address",
+    "num_trades",
+    "num_winning_trades",
+    "num_redeemed",
+    "total_investment",
+    "total_trade_fees",
+    "num_mech_calls",
+    "total_mech_fees",
+    "total_earnings",
+    "total_redeemed_amount",
+    "total_net_earnings",
+    "total_net_earnings_wo_mech_fees",
+    "total_roi",
+    "total_roi_wo_mech_fees",
+    "mean_mech_calls_per_trade",
+    "mean_mech_fee_amount_per_trade",
+]
+headers = {
+    "Accept": "application/json, multipart/mixed",
+    "Content-Type": "application/json",
+}
+omen_xdai_trades_query = Template(
+    """
+    {
+        fpmmTrades(
+            where: {
+                type: Buy,
+                fpmm_: {
+                    creator: "${fpmm_creator}"
+                    creationTimestamp_gte: "${fpmm_creationTimestamp_gte}",
+                    creationTimestamp_lt: "${fpmm_creationTimestamp_lte}"
+                },
+                creationTimestamp_gte: "${creationTimestamp_gte}",
+                creationTimestamp_lte: "${creationTimestamp_lte}"
+                id_gt: "${id_gt}"
+            }
+            first: ${first}
+            orderBy: id
+            orderDirection: asc
+        ) {
+            id
+            title
+            collateralToken
+            outcomeTokenMarginalPrice
+            oldOutcomeTokenMarginalPrice
+            type
+            creator {
+                id
+            }
+            creationTimestamp
+            collateralAmount
+            collateralAmountUSD
+            feeAmount
+            outcomeIndex
+            outcomeTokensTraded
+            transactionHash
+            fpmm {
+                id
+                outcomes
+                title
+                answerFinalizedTimestamp
+                currentAnswer
+                isPendingArbitration
+                arbitrationOccurred
+                openingTimestamp
+                condition {
+                    id
+                }
+            }
+        }
+    }
+    """
+)
+conditional_tokens_gc_user_query = Template(
+    """
+    {
+        user(id: "${id}") {
+            userPositions(
+                first: ${first}
+                where: {
+                    id_gt: "${userPositions_id_gt}"
+                }
+                orderBy: id
+            ) {
+                balance
+                id
+                position {
+                    id
+                    conditionIds
+                }
+                totalBalance
+                wrappedBalance
+            }
+        }
+    }
+    """
+)
+def _to_content(q: str) -> dict[str, Any]:
+    """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
+    finalized_query = {
+        "query": q,
+        "variables": None,
+        "extensions": {"headers": None},
+    }
+    return finalized_query
+def _query_omen_xdai_subgraph(
+    from_timestamp: float,
+    to_timestamp: float,
+    fpmm_from_timestamp: float,
+    fpmm_to_timestamp: float,
+) -> dict[str, Any]:
+    """Query the subgraph."""
+    url = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
+    grouped_results = defaultdict(list)
+    id_gt = ""
+    while True:
+        query = omen_xdai_trades_query.substitute(
+            fpmm_creator=FPMM_CREATOR.lower(),
+            creationTimestamp_gte=int(from_timestamp),
+            creationTimestamp_lte=int(to_timestamp),
+            fpmm_creationTimestamp_gte=int(fpmm_from_timestamp),
+            fpmm_creationTimestamp_lte=int(fpmm_to_timestamp),
+            first=QUERY_BATCH_SIZE,
+            id_gt=id_gt,
+        )
+        content_json = _to_content(query)
+        res = requests.post(url, headers=headers, json=content_json)
+        result_json = res.json()
+        user_trades = result_json.get("data", {}).get("fpmmTrades", [])
+        if not user_trades:
+            break
+        for trade in user_trades:
+            fpmm_id = trade.get("fpmm", {}).get("id")
+            grouped_results[fpmm_id].append(trade)
+        id_gt = user_trades[len(user_trades) - 1]["id"]
+    all_results = {
+        "data": {
+            "fpmmTrades": [
+                trade
+                for trades_list in grouped_results.values()
+                for trade in trades_list
+            ]
+        }
+    }
+    return all_results
+def _query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
+    """Query the subgraph."""
+    url = "https://api.thegraph.com/subgraphs/name/gnosis/conditional-tokens-gc"
+    all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
+    userPositions_id_gt = ""
+    while True:
+        query = conditional_tokens_gc_user_query.substitute(
+            id=creator.lower(),
+            first=QUERY_BATCH_SIZE,
+            userPositions_id_gt=userPositions_id_gt,
+        )
+        content_json = {"query": query}
+        res = requests.post(url, headers=headers, json=content_json)
+        result_json = res.json()
+        user_data = result_json.get("data", {}).get("user", {})
+        if not user_data:
+            break
+        user_positions = user_data.get("userPositions", [])
+        if user_positions:
+            all_results["data"]["user"]["userPositions"].extend(user_positions)
+            userPositions_id_gt = user_positions[len(user_positions) - 1]["id"]
+        else:
+            break
+    if len(all_results["data"]["user"]["userPositions"]) == 0:
+        return {"data": {"user": None}}
+    return all_results
+def convert_hex_to_int(x: Union[str, float]) -> Union[int, float]:
+    """Convert hex to int"""
+    if isinstance(x, float):
+        return np.nan
+    elif isinstance(x, str):
+        if x == INVALID_ANSWER_HEX:
+            return -1
+        else:
+            return int(x, 16)
+def wei_to_unit(wei: int) -> float:
+    """Converts wei to currency unit."""
+    return wei / 10**18
+def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
+    """Returns whether the user has redeemed the position."""
+    user_positions = user_json["data"]["user"]["userPositions"]
+    outcomes_tokens_traded = int(fpmmTrade["outcomeTokensTraded"])
+    condition_id = fpmmTrade["fpmm.condition.id"]
+    for position in user_positions:
+        position_condition_ids = position["position"]["conditionIds"]
+        balance = int(position["balance"])
+        if condition_id in position_condition_ids:
+            if balance == 0:
+                return True
+            # return early
+            return False
+    return False
+def create_fpmmTrades(rpc: str):
+    """Create fpmmTrades for all trades."""
+    trades_json = _query_omen_xdai_subgraph(
+        from_timestamp=DEFAULT_FROM_TIMESTAMP,
+        to_timestamp=DEFAULT_TO_TIMESTAMP,
+        fpmm_from_timestamp=DEFAULT_FROM_TIMESTAMP,
+        fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
+    )
+    # convert to dataframe
+    df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
+    # convert creator to address
+    df["creator"] = df["creator"].apply(lambda x: x["id"])
+    # normalize fpmm column
+    fpmm = pd.json_normalize(df["fpmm"])
+    fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
+    df = pd.concat([df, fpmm], axis=1)
+    # drop fpmm column
+    df.drop(["fpmm"], axis=1, inplace=True)
+    # change creator to creator_address
+    df.rename(columns={"creator": "trader_address"}, inplace=True)
+    # save to csv
+    df.to_csv("fpmmTrades.csv", index=False)
+    return df
+def prepare_profitalibity_data(rpc: str):
+    """Prepare data for profitalibity analysis."""
+    # Check if tools.py is in the same directory
+    try:
+        # load tools.csv
+        tools = pd.read_csv("tools.csv")
+        # make sure creator_address is in the columns
+        assert "trader_address" in tools.columns, "trader_address column not found"
+        # lowercase and strip creator_address
+        tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
+        # drop duplicates
+        tools.drop_duplicates(inplace=True)
+        print("tools.csv loaded")
+    except FileNotFoundError:
+        print("tools.csv not found. Please run tools.py first.")
+        return
+    # Check if fpmmTrades.csv is in the same directory
+    try:
+        # load fpmmTrades.csv
+        fpmmTrades = pd.read_csv("fpmmTrades.csv")
+        print("fpmmTrades.csv loaded")
+    except FileNotFoundError:
+        print("fpmmTrades.csv not found. Creating fpmmTrades.csv...")
+        fpmmTrades = create_fpmmTrades(rpc)
+        fpmmTrades.to_csv("fpmmTrades.csv", index=False)
+        fpmmTrades = pd.read_csv("fpmmTrades.csv")
+    # make sure trader_address is in the columns
+    assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
+    # lowercase and strip creator_address
+    fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
+    return fpmmTrades, tools
+def determine_market_status(trade, current_answer):
+    """Determine the market status of a trade."""
+    if current_answer is np.nan and time.time() >= trade["fpmm.openingTimestamp"]:
+        return MarketState.PENDING
+    elif current_answer == np.nan:
+        return MarketState.OPEN
+    elif trade["fpmm.isPendingArbitration"]:
+        return MarketState.ARBITRATING
+    elif time.time() < trade["fpmm.answerFinalizedTimestamp"]:
+        return MarketState.FINALIZING
+    return MarketState.CLOSED
+def analyse_trader(
+    trader_address: str, fpmmTrades: pd.DataFrame, tools: pd.DataFrame
+) -> pd.DataFrame:
+    """Analyse a trader's trades"""
+    # Filter trades and tools for the given trader
+    trades = fpmmTrades[fpmmTrades["trader_address"] == trader_address]
+    tools_usage = tools[tools["trader_address"] == trader_address]
+    # Prepare the DataFrame
+    trades_df = pd.DataFrame(columns=ALL_TRADES_STATS_DF_COLS)
+    if trades.empty:
+        return trades_df
+    # Fetch user's conditional tokens gc graph
+    try:
+        user_json = _query_conditional_tokens_gc_subgraph(trader_address)
+    except Exception as e:
+        print(f"Error fetching user data: {e}")
+        return trades_df
+    # Iterate over the trades
+    for i, trade in tqdm(trades.iterrows(), total=len(trades), desc="Analysing trades"):
+        try:
+            # Parsing and computing shared values
+            creation_timestamp_utc = datetime.datetime.fromtimestamp(
+                trade["creationTimestamp"], tz=datetime.timezone.utc
+            )
+            collateral_amount = wei_to_unit(float(trade["collateralAmount"]))
+            fee_amount = wei_to_unit(float(trade["feeAmount"]))
+            outcome_tokens_traded = wei_to_unit(float(trade["outcomeTokensTraded"]))
+            earnings, winner_trade = (0, False)
+            redemption = _is_redeemed(user_json, trade)
+            current_answer = trade["fpmm.currentAnswer"]
+            # Determine market status
+            market_status = determine_market_status(trade, current_answer)
+            # Skip non-closed markets
+            if market_status != MarketState.CLOSED:
+                print(
+                    f"Skipping trade {i} because market is not closed. Market Status: {market_status}"
+                )
+                continue
+            current_answer = convert_hex_to_int(current_answer)
+            # Compute invalidity
+            is_invalid = current_answer == INVALID_ANSWER
+            # Compute earnings and winner trade status
+            if is_invalid:
+                earnings = collateral_amount
+                winner_trade = False
+            elif trade["outcomeIndex"] == current_answer:
+                earnings = outcome_tokens_traded
+                winner_trade = True
+            # Compute mech calls
+            num_mech_calls = (
+                tools_usage["prompt_request"].apply(lambda x: trade["title"] in x).sum()
+            )
+            net_earnings = (
+                earnings
+                - fee_amount
+                - (num_mech_calls * DEFAULT_MECH_FEE)
+                - collateral_amount
+            )
+            # Assign values to DataFrame
+            trades_df.loc[i] = {
+                "trader_address": trader_address,
+                "trade_id": trade["id"],
+                "market_status": market_status.name,
+                "creation_timestamp": creation_timestamp_utc,
+                "title": trade["title"],
+                "collateral_amount": collateral_amount,
+                "outcome_index": trade["outcomeIndex"],
+                "trade_fee_amount": fee_amount,
+                "outcomes_tokens_traded": outcome_tokens_traded,
+                "current_answer": current_answer,
+                "is_invalid": is_invalid,
+                "winning_trade": winner_trade,
+                "earnings": earnings,
+                "redeemed": redemption,
+                "redeemed_amount": earnings if redemption else 0,
+                "num_mech_calls": num_mech_calls,
+                "mech_fee_amount": num_mech_calls * DEFAULT_MECH_FEE,
+                "net_earnings": net_earnings,
+                "roi": net_earnings / collateral_amount,
+            }
+        except Exception as e:
+            print(f"Error processing trade {i}: {e}")
+            continue
+    return trades_df
+def analyse_all_traders(trades: pd.DataFrame, tools: pd.DataFrame) -> pd.DataFrame:
+    """Analyse all creators."""
+    all_traders = []
+    for trader in tqdm(
+        trades["trader_address"].unique(),
+        total=len(trades["trader_address"].unique()),
+        desc="Analysing creators",
+    ):
+        all_traders.append(analyse_trader(trader, trades, tools))
+    # concat all creators
+    all_creators_df = pd.concat(all_traders)
+    return all_creators_df
+def summary_analyse(df):
+    """Summarise profitability analysis."""
+    # Ensure DataFrame is not empty
+    if df.empty:
+        return pd.DataFrame(columns=SUMMARY_STATS_DF_COLS)
+    # Group by trader_address
+    grouped = df.groupby("trader_address")
+    # Create summary DataFrame
+    summary_df = grouped.agg(
+        num_trades=("trader_address", "size"),
+        num_winning_trades=("winning_trade", lambda x: float((x).sum())),
+        num_redeemed=("redeemed", lambda x: float(x.sum())),
+        total_investment=("collateral_amount", "sum"),
+        total_trade_fees=("trade_fee_amount", "sum"),
+        num_mech_calls=("num_mech_calls", "sum"),
+        total_mech_fees=("mech_fee_amount", "sum"),
+        total_earnings=("earnings", "sum"),
+        total_redeemed_amount=("redeemed_amount", "sum"),
+        total_net_earnings=("net_earnings", "sum"),
+    )
+    # Calculating additional columns
+    summary_df["total_roi"] = (
+        summary_df["total_net_earnings"] / summary_df["total_investment"]
+    )
+    summary_df["mean_mech_calls_per_trade"] = (
+        summary_df["num_mech_calls"] / summary_df["num_trades"]
+    )
+    summary_df["mean_mech_fee_amount_per_trade"] = (
+        summary_df["total_mech_fees"] / summary_df["num_trades"]
+    )
+    summary_df["total_net_earnings_wo_mech_fees"] = (
+        summary_df["total_net_earnings"] + summary_df["total_mech_fees"]
+    )
+    summary_df["total_roi_wo_mech_fees"] = (
+        summary_df["total_net_earnings_wo_mech_fees"] / summary_df["total_investment"]
+    )
+    # Resetting index to include trader_address
+    summary_df.reset_index(inplace=True)
+    return summary_df
+def run_profitability_analysis(rpc):
+    """Create all trades analysis."""
+    # load dfs from csv for analysis
+    print("Preparing data...")
+    fpmmTrades, tools = prepare_profitalibity_data(rpc)
+    # all trades profitability df
+    print("Analysing trades...")
+    all_trades_df = analyse_all_traders(fpmmTrades, tools)
+    # summarize profitability df
+    print("Summarising trades...")
+    summary_df = summary_analyse(all_trades_df)
+    # save to csv
+    all_trades_df.to_csv("all_trades_profitability.csv", index=False)
+    summary_df.to_csv("summary_profitability.csv", index=False)
+    print("Done!")
+    return all_trades_df, summary_df
+if __name__ == "__main__":
+    rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
+    run_profitability_analysis(rpc)

scripts/tools.py ADDED Viewed

	@@ -0,0 +1,761 @@

+#   -*- coding: utf-8 -*-
+#   ------------------------------------------------------------------------------
+#
+#     Copyright 2023 Valory AG
+#
+#     Licensed under the Apache License, Version 2.0 (the "License");
+#     you may not use this file except in compliance with the License.
+#     You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#     Unless required by applicable law or agreed to in writing, software
+#     distributed under the License is distributed on an "AS IS" BASIS,
+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#     See the License for the specific language governing permissions and
+#     limitations under the License.
+#
+#   ------------------------------------------------------------------------------
+import json
+import os.path
+import re
+import sys
+import time
+import random
+from dataclasses import dataclass
+from enum import Enum
+from io import StringIO
+from typing import (
+    Optional,
+    List,
+    Dict,
+    Any,
+    Union,
+    Callable,
+    Tuple,
+)
+import pandas as pd
+import requests
+from json.decoder import JSONDecodeError
+from eth_typing import ChecksumAddress
+from eth_utils import to_checksum_address
+from requests.adapters import HTTPAdapter
+from requests.exceptions import (
+    ReadTimeout as RequestsReadTimeoutError,
+    HTTPError as RequestsHTTPError,
+)
+from tqdm import tqdm
+from urllib3 import Retry
+from urllib3.exceptions import (
+    ReadTimeoutError as Urllib3ReadTimeoutError,
+    HTTPError as Urllib3HTTPError,
+)
+from web3 import Web3, HTTPProvider
+from web3.exceptions import MismatchedABI
+from web3.types import BlockParams
+from concurrent.futures import ThreadPoolExecutor, as_completed
+CONTRACTS_PATH = "contracts"
+MECH_TO_INFO = {
+    # this block number is when the creator had its first tx ever, and after this mech's creation
+    "0xff82123dfb52ab75c417195c5fdb87630145ae81": ("old_mech_abi.json", 28911547),
+    # this block number is when this mech was created
+    "0x77af31de935740567cf4ff1986d04b2c964a786a": ("new_mech_abi.json", 30776879),
+}
+# optionally set the latest block to stop searching for the delivered events
+LATEST_BLOCK: Optional[int] = None
+LATEST_BLOCK_NAME: BlockParams = "latest"
+BLOCK_DATA_NUMBER = "number"
+BLOCKS_CHUNK_SIZE = 10_000
+REDUCE_FACTOR = 0.25
+EVENT_ARGUMENTS = "args"
+DATA = "data"
+REQUEST_ID = "requestId"
+REQUEST_ID_FIELD = "request_id"
+REQUEST_SENDER = "sender"
+PROMPT_FIELD = "prompt"
+BLOCK_FIELD = "block"
+CID_PREFIX = "f01701220"
+HTTP = "http://"
+HTTPS = HTTP[:4] + "s" + HTTP[4:]
+IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
+IPFS_LINKS_SERIES_NAME = "ipfs_links"
+BACKOFF_FACTOR = 1
+STATUS_FORCELIST = [404, 500, 502, 503, 504]
+DEFAULT_FILENAME = "tools.csv"
+RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
+ABI_ERROR = "The event signature did not match the provided ABI"
+SLEEP = 0.5
+HTTP_TIMEOUT = 10
+N_IPFS_RETRIES = 1
+N_RPC_RETRIES = 100
+RPC_POLL_INTERVAL = 0.05
+IPFS_POLL_INTERVAL = 0.05
+FORMAT_UPDATE_BLOCK_NUMBER = 30411638
+IRRELEVANT_TOOLS = [
+    "openai-text-davinci-002",
+    "openai-text-davinci-003",
+    "openai-gpt-3.5-turbo",
+    "openai-gpt-4",
+    "stabilityai-stable-diffusion-v1-5",
+    "stabilityai-stable-diffusion-xl-beta-v2-2-2",
+    "stabilityai-stable-diffusion-512-v2-1",
+    "stabilityai-stable-diffusion-768-v2-1",
+    "deepmind-optimization-strong",
+    "deepmind-optimization",
+]
+# this is how frequently we will keep a snapshot of the progress so far in terms of blocks' batches
+# for example, the value 1 means that for every `BLOCKS_CHUNK_SIZE` blocks that we search, we also store the snapshot
+SNAPSHOT_RATE = 10
+NUM_WORKERS = 10
+GET_CONTENTS_BATCH_SIZE = 1000
+class MechEventName(Enum):
+    """The mech's event names."""
+    REQUEST = "Request"
+    DELIVER = "Deliver"
+@dataclass
+class MechEvent:
+    """A mech's on-chain event representation."""
+    for_block: int
+    requestId: int
+    data: bytes
+    sender: str
+    def _ipfs_link(self) -> Optional[str]:
+        """Get the ipfs link for the data."""
+        return f"{IPFS_ADDRESS}{CID_PREFIX}{self.data.hex()}"
+    @property
+    def ipfs_request_link(self) -> Optional[str]:
+        """Get the IPFS link for the request."""
+        return f"{self._ipfs_link()}/metadata.json"
+    @property
+    def ipfs_deliver_link(self) -> Optional[str]:
+        """Get the IPFS link for the deliver."""
+        if self.requestId is None:
+            return None
+        return f"{self._ipfs_link()}/{self.requestId}"
+    def ipfs_link(self, event_name: MechEventName) -> Optional[str]:
+        """Get the ipfs link based on the event."""
+        if event_name == MechEventName.REQUEST:
+            if self.for_block < FORMAT_UPDATE_BLOCK_NUMBER:
+                return self._ipfs_link()
+            return self.ipfs_request_link
+        if event_name == MechEventName.DELIVER:
+            return self.ipfs_deliver_link
+        return None
+@dataclass(init=False)
+class MechRequest:
+    """A structure for a request to a mech."""
+    request_id: Optional[int]
+    request_block: Optional[int]
+    prompt_request: Optional[str]
+    tool: Optional[str]
+    nonce: Optional[str]
+    trader_address: Optional[str]
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the request ignoring extra keys."""
+        self.request_id = int(kwargs.pop(REQUEST_ID, 0))
+        self.request_block = int(kwargs.pop(BLOCK_FIELD, 0))
+        self.prompt_request = kwargs.pop(PROMPT_FIELD, None)
+        self.tool = kwargs.pop("tool", None)
+        self.nonce = kwargs.pop("nonce", None)
+        self.trader_address = kwargs.pop("sender", None)
+@dataclass(init=False)
+class PredictionResponse:
+    """A response of a prediction."""
+    p_yes: float
+    p_no: float
+    confidence: float
+    info_utility: float
+    vote: Optional[str]
+    win_probability: Optional[float]
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the mech's prediction ignoring extra keys."""
+        try:
+            self.p_yes = float(kwargs.pop("p_yes"))
+            self.p_no = float(kwargs.pop("p_no"))
+            self.confidence = float(kwargs.pop("confidence"))
+            self.info_utility = float(kwargs.pop("info_utility"))
+            self.win_probability = 0
+            # Validate probabilities
+            probabilities = {
+                "p_yes": self.p_yes,
+                "p_no": self.p_no,
+                "confidence": self.confidence,
+                "info_utility": self.info_utility,
+            }
+            for name, prob in probabilities.items():
+                if not 0 <= prob <= 1:
+                    raise ValueError(f"{name} probability is out of bounds: {prob}")
+            if self.p_yes + self.p_no != 1:
+                raise ValueError(
+                    f"Sum of p_yes and p_no is not 1: {self.p_yes} + {self.p_no}"
+                )
+            self.vote = self.get_vote()
+            self.win_probability = self.get_win_probability()
+        except KeyError as e:
+            raise KeyError(f"Missing key in PredictionResponse: {e}")
+        except ValueError as e:
+            raise ValueError(f"Invalid value in PredictionResponse: {e}")
+    def get_vote(self) -> Optional[str]:
+        """Return the vote."""
+        if self.p_no == self.p_yes:
+            return None
+        if self.p_no > self.p_yes:
+            return "No"
+        return "Yes"
+    def get_win_probability(self) -> Optional[float]:
+        """Return the probability estimation for winning with vote."""
+        return max(self.p_no, self.p_yes)
+@dataclass(init=False)
+class MechResponse:
+    """A structure for the response of a mech."""
+    request_id: int
+    deliver_block: Optional[int]
+    result: Optional[PredictionResponse]
+    error: Optional[str]
+    error_message: Optional[str]
+    prompt_response: Optional[str]
+    mech_address: Optional[str]
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the mech's response ignoring extra keys."""
+        self.error = kwargs.get("error", None)
+        self.request_id = int(kwargs.get(REQUEST_ID, 0))
+        self.deliver_block = int(kwargs.get(BLOCK_FIELD, 0))
+        self.result = kwargs.get("result", None)
+        self.prompt_response = kwargs.get(PROMPT_FIELD, None)
+        self.mech_address = kwargs.get("sender", None)
+        if self.result != "Invalid response":
+            self.error_message = kwargs.get("error_message", None)
+            try:
+                if isinstance(self.result, str):
+                    kwargs = json.loads(self.result)
+                    self.result = PredictionResponse(**kwargs)
+                    self.error = str(False)
+            except JSONDecodeError:
+                self.error_message = "Response parsing error"
+                self.error = str(True)
+            except Exception as e:
+                self.error_message = str(e)
+                self.error = str(True)
+        else:
+            self.error_message = "Invalid response from tool"
+            self.error = str(True)
+            self.result = None
+EVENT_TO_MECH_STRUCT = {
+    MechEventName.REQUEST: MechRequest,
+    MechEventName.DELIVER: MechResponse,
+}
+def parse_args() -> str:
+    """Parse the arguments and return the RPC."""
+    if len(sys.argv) != 2:
+        raise ValueError("Expected the RPC as a positional argument.")
+    return sys.argv[1]
+def read_abi(abi_path: str) -> str:
+    """Read and return the wxDAI contract's ABI."""
+    with open(abi_path) as abi_file:
+        return abi_file.read()
+def reduce_window(contract_instance, event, from_block, batch_size, latest_block):
+    """Dynamically reduce the batch size window."""
+    keep_fraction = 1 - REDUCE_FACTOR
+    events_filter = contract_instance.events[event].build_filter()
+    events_filter.fromBlock = from_block
+    batch_size = int(batch_size * keep_fraction)
+    events_filter.toBlock = min(from_block + batch_size, latest_block)
+    tqdm.write(f"RPC timed out! Resizing batch size to {batch_size}.")
+    time.sleep(SLEEP)
+    return events_filter, batch_size
+def get_events(
+    w3: Web3,
+    event: str,
+    mech_address: ChecksumAddress,
+    mech_abi_path: str,
+    earliest_block: int,
+    latest_block: int,
+) -> List:
+    """Get the delivered events."""
+    abi = read_abi(mech_abi_path)
+    contract_instance = w3.eth.contract(address=mech_address, abi=abi)
+    events = []
+    from_block = earliest_block
+    batch_size = BLOCKS_CHUNK_SIZE
+    with tqdm(
+        total=latest_block - from_block,
+        desc=f"Searching {event} events for mech {mech_address}",
+        unit="blocks",
+    ) as pbar:
+        while from_block < latest_block:
+            events_filter = contract_instance.events[event].build_filter()
+            events_filter.fromBlock = from_block
+            events_filter.toBlock = min(from_block + batch_size, latest_block)
+            entries = None
+            retries = 0
+            while entries is None:
+                try:
+                    entries = events_filter.deploy(w3).get_all_entries()
+                    retries = 0
+                except (RequestsHTTPError, Urllib3HTTPError) as exc:
+                    if "Request Entity Too Large" in exc.args[0]:
+                        events_filter, batch_size = reduce_window(
+                            contract_instance,
+                            event,
+                            from_block,
+                            batch_size,
+                            latest_block,
+                        )
+                except (Urllib3ReadTimeoutError, RequestsReadTimeoutError):
+                    events_filter, batch_size = reduce_window(
+                        contract_instance, event, from_block, batch_size, latest_block
+                    )
+                except Exception as exc:
+                    retries += 1
+                    if retries == N_RPC_RETRIES:
+                        tqdm.write(
+                            f"Skipping events for blocks {events_filter.fromBlock} - {events_filter.toBlock} "
+                            f"as the retries have been exceeded."
+                        )
+                        break
+                    sleep = SLEEP * retries
+                    if (
+                        (
+                            isinstance(exc, ValueError)
+                            and re.match(
+                                RE_RPC_FILTER_ERROR, exc.args[0].get("message", "")
+                            )
+                            is None
+                        )
+                        and not isinstance(exc, ValueError)
+                        and not isinstance(exc, MismatchedABI)
+                    ):
+                        tqdm.write(
+                            f"An error was raised from the RPC: {exc}\n Retrying in {sleep} seconds."
+                        )
+                    time.sleep(sleep)
+            from_block += batch_size
+            pbar.update(batch_size)
+            if entries is None:
+                continue
+            chunk = list(entries)
+            events.extend(chunk)
+            time.sleep(RPC_POLL_INTERVAL)
+    return events
+def parse_events(raw_events: List) -> List[MechEvent]:
+    """Parse all the specified MechEvents."""
+    parsed_events = []
+    for event in raw_events:
+        for_block = event.get("blockNumber", 0)
+        args = event.get(EVENT_ARGUMENTS, {})
+        request_id = args.get(REQUEST_ID, 0)
+        data = args.get(DATA, b"")
+        sender = args.get(REQUEST_SENDER, "")
+        parsed_event = MechEvent(for_block, request_id, data, sender)
+        parsed_events.append(parsed_event)
+    return parsed_events
+def create_session() -> requests.Session:
+    """Create a session with a retry strategy."""
+    session = requests.Session()
+    retry_strategy = Retry(
+        total=N_IPFS_RETRIES + 1,
+        backoff_factor=BACKOFF_FACTOR,
+        status_forcelist=STATUS_FORCELIST,
+    )
+    adapter = HTTPAdapter(max_retries=retry_strategy)
+    for protocol in (HTTP, HTTPS):
+        session.mount(protocol, adapter)
+    return session
+def request(
+    session: requests.Session, url: str, timeout: int = HTTP_TIMEOUT
+) -> Optional[requests.Response]:
+    """Perform a request with a session."""
+    try:
+        response = session.get(url, timeout=timeout)
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as exc:
+        tqdm.write(f"HTTP error occurred: {exc}.")
+    except Exception as exc:
+        tqdm.write(f"Unexpected error occurred: {exc}.")
+    else:
+        return response
+    return None
+def limit_text(text: str, limit: int = 200) -> str:
+    """Limit the given text"""
+    if len(text) > limit:
+        return f"{text[:limit]}..."
+    return text
+def parse_ipfs_response(
+    session: requests.Session,
+    url: str,
+    event: MechEvent,
+    event_name: MechEventName,
+    response: requests.Response,
+) -> Optional[Dict[str, str]]:
+    """Parse a response from IPFS."""
+    try:
+        return response.json()
+    except requests.exceptions.JSONDecodeError:
+        # this is a workaround because the `metadata.json` file was introduced and removed multiple times
+        if event_name == MechEventName.REQUEST and url != event.ipfs_request_link:
+            url = event.ipfs_request_link
+            response = request(session, url)
+            if response is None:
+                tqdm.write(f"Skipping {event=}.")
+                return None
+            try:
+                return response.json()
+            except requests.exceptions.JSONDecodeError:
+                pass
+    tqdm.write(f"Failed to parse response into json for {url=}.")
+    return None
+def parse_ipfs_tools_content(
+    raw_content: Dict[str, str], event: MechEvent, event_name: MechEventName
+) -> Optional[Union[MechRequest, MechResponse]]:
+    """Parse tools content from IPFS."""
+    struct = EVENT_TO_MECH_STRUCT.get(event_name)
+    raw_content[REQUEST_ID] = str(event.requestId)
+    raw_content[BLOCK_FIELD] = str(event.for_block)
+    raw_content["sender"] = str(event.sender)
+    try:
+        mech_response = struct(**raw_content)
+    except (ValueError, TypeError, KeyError):
+        tqdm.write(f"Could not parse {limit_text(str(raw_content))}")
+        return None
+    if event_name == MechEventName.REQUEST and mech_response.tool in IRRELEVANT_TOOLS:
+        return None
+    return mech_response
+def get_contents(
+    session: requests.Session, events: List[MechEvent], event_name: MechEventName
+) -> pd.DataFrame:
+    """Fetch the tools' responses."""
+    contents = []
+    for event in tqdm(events, desc=f"Tools' results", unit="results"):
+        url = event.ipfs_link(event_name)
+        response = request(session, url)
+        if response is None:
+            tqdm.write(f"Skipping {event=}.")
+            continue
+        raw_content = parse_ipfs_response(session, url, event, event_name, response)
+        if raw_content is None:
+            continue
+        mech_response = parse_ipfs_tools_content(raw_content, event, event_name)
+        if mech_response is None:
+            continue
+        contents.append(mech_response)
+        time.sleep(IPFS_POLL_INTERVAL)
+    return pd.DataFrame(contents)
+def check_for_dicts(df: pd.DataFrame) -> List[str]:
+    """Check for columns that contain dictionaries."""
+    dict_columns = []
+    for column in df.columns:
+        if df[column].apply(lambda x: isinstance(x, dict)).any():
+            dict_columns.append(column)
+    return dict_columns
+def drop_dict_rows(df: pd.DataFrame,
+    dict_columns: List[str]) -> pd.DataFrame:
+    """Drop rows that contain dictionaries."""
+    for column in dict_columns:
+        df = df[~df[column].apply(lambda x: isinstance(x, dict))]
+    return df
+def clean(df: pd.DataFrame) -> pd.DataFrame:
+    """Clean the dataframe."""
+    dict_columns = check_for_dicts(df)
+    df = drop_dict_rows(df, dict_columns)
+    cleaned = df.drop_duplicates()
+    cleaned[REQUEST_ID_FIELD] = cleaned[REQUEST_ID_FIELD].astype("str")
+    return cleaned
+def transform_request(contents: pd.DataFrame) -> pd.DataFrame:
+    """Transform the requests dataframe."""
+    return clean(contents)
+def transform_deliver(contents: pd.DataFrame, full_contents=False) -> pd.DataFrame:
+    """Transform the delivers dataframe."""
+    unpacked_result = pd.json_normalize(contents.result)
+    # # drop result column if it exists
+    if "result" in unpacked_result.columns:
+        unpacked_result.drop(columns=["result"], inplace=True)
+    # drop prompt column if it exists
+    if "prompt" in unpacked_result.columns:
+        unpacked_result.drop(columns=["prompt"], inplace=True)
+    # rename prompt column to prompt_deliver
+    unpacked_result.rename(columns={"prompt": "prompt_deliver"}, inplace=True)
+    contents = pd.concat((contents, unpacked_result), axis=1)
+    if "result" in contents.columns:
+        contents.drop(columns=["result"], inplace=True)
+    if "prompt" in contents.columns:
+        contents.drop(columns=["prompt"], inplace=True)
+    return clean(contents)
+def gen_event_filename(event_name: MechEventName) -> str:
+    """Generate the filename of an event."""
+    return f"{event_name.value.lower()}s.csv"
+def read_n_last_lines(filename: str, n: int = 1) -> str:
+    """Return the `n` last lines' content of a file."""
+    num_newlines = 0
+    with open(filename, "rb") as f:
+        try:
+            f.seek(-2, os.SEEK_END)
+            while num_newlines < n:
+                f.seek(-2, os.SEEK_CUR)
+                if f.read(1) == b"\n":
+                    num_newlines += 1
+        except OSError:
+            f.seek(0)
+        last_line = f.readline().decode()
+    return last_line
+def get_earliest_block(event_name: MechEventName) -> int:
+    """Get the earliest block number to use when filtering for events."""
+    filename = gen_event_filename(event_name)
+    if not os.path.exists(filename):
+        return 0
+    cols = pd.read_csv(filename, index_col=0, nrows=0).columns.tolist()
+    last_line_buff = StringIO(read_n_last_lines(filename))
+    last_line_series = pd.read_csv(last_line_buff, names=cols)
+    block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
+    return int(last_line_series[block_field].values[0])
+def store_progress(
+    filename: str,
+    event_to_contents: Dict[MechEventName, pd.DataFrame],
+    tools: pd.DataFrame,
+) -> None:
+    """Store the given progress."""
+    if filename:
+        for event_name, content in event_to_contents.items():
+            event_filename = gen_event_filename(event_name)
+            if "result" in content.columns:
+                content.drop(columns=["result"], inplace=True)
+            content.to_csv(event_filename, index=False, escapechar="\\")
+        # drop result and error columns
+        if "result" in tools.columns:
+            tools.drop(columns=["result"], inplace=True)
+        tools.to_csv(filename, index=False, escapechar="\\")
+def etl(
+    rpcs: List[str], filename: Optional[str] = None, full_contents: bool = True
+) -> pd.DataFrame:
+    """Fetch from on-chain events, process, store and return the tools' results on all the questions as a Dataframe."""
+    w3s = [Web3(HTTPProvider(r)) for r in rpcs]
+    session = create_session()
+    event_to_transformer = {
+        MechEventName.REQUEST: transform_request,
+        MechEventName.DELIVER: transform_deliver,
+    }
+    mech_to_info = {
+        to_checksum_address(address): (
+            os.path.join(CONTRACTS_PATH, filename),
+            earliest_block,
+        )
+        for address, (filename, earliest_block) in MECH_TO_INFO.items()
+    }
+    event_to_contents = {}
+    latest_block = LATEST_BLOCK
+    if latest_block is None:
+        latest_block = w3s[0].eth.get_block(LATEST_BLOCK_NAME)[BLOCK_DATA_NUMBER]
+    next_start_block = None
+    # Loop through events in event_to_transformer
+    for event_name, transformer in event_to_transformer.items():
+        if next_start_block is None:
+            next_start_block_base = get_earliest_block(event_name)
+        # Loop through mech addresses in mech_to_info
+        events = []
+        for address, (abi, earliest_block) in mech_to_info.items():
+            if next_start_block_base == 0:
+                next_start_block = earliest_block
+            else:
+                next_start_block = next_start_block_base
+            print(
+                f"Searching for {event_name.value} events for mech {address} from block {next_start_block} to {latest_block}."
+            )
+            # parallelize the fetching of events
+            with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
+                futures = []
+                for i in range(
+                    next_start_block, latest_block, BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE
+                ):
+                    futures.append(
+                        executor.submit(
+                            get_events,
+                            random.choice(w3s),
+                            event_name.value,
+                            address,
+                            abi,
+                            i,
+                            min(i + BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE, latest_block),
+                        )
+                    )
+                for future in tqdm(
+                    as_completed(futures),
+                    total=len(futures),
+                    desc=f"Fetching {event_name.value} Events",
+                ):
+                    current_mech_events = future.result()
+                    events.extend(current_mech_events)
+        parsed = parse_events(events)
+        contents = []
+        with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
+            futures = []
+            for i in range(0, len(parsed), GET_CONTENTS_BATCH_SIZE):
+                futures.append(
+                    executor.submit(
+                        get_contents,
+                        session,
+                        parsed[i : i + GET_CONTENTS_BATCH_SIZE],
+                        event_name,
+                    )
+                )
+            for future in tqdm(
+                as_completed(futures),
+                total=len(futures),
+                desc=f"Fetching {event_name.value} Contents",
+            ):
+                current_mech_contents = future.result()
+                contents.append(current_mech_contents)
+        contents = pd.concat(contents, ignore_index=True)
+        full_contents = True
+        if event_name == MechEventName.REQUEST:
+            transformed = transformer(contents)
+        elif event_name == MechEventName.DELIVER:
+            transformed = transformer(contents, full_contents=full_contents)
+        events_filename = gen_event_filename(event_name)
+        if os.path.exists(events_filename):
+            old = pd.read_csv(events_filename)
+            # Reset index to avoid index conflicts
+            old.reset_index(drop=True, inplace=True)
+            transformed.reset_index(drop=True, inplace=True)
+            # Concatenate DataFrames
+            transformed = pd.concat([old, transformed], ignore_index=True)
+            # Drop duplicates if necessary
+            transformed.drop_duplicates(subset=REQUEST_ID_FIELD, inplace=True)
+        event_to_contents[event_name] = transformed.copy()
+    # Store progress
+    tools = pd.merge(*event_to_contents.values(), on=REQUEST_ID_FIELD)
+    store_progress(filename, event_to_contents, tools)
+    return tools
+if __name__ == "__main__":
+    RPCs = [
+        "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",
+    ]
+    tools = etl(rpcs=RPCs, filename=DEFAULT_FILENAME, full_contents=True)

scripts/weekly_analysis.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import logging
+import re
+import os
+from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+from web3 import Web3
+from typing import Optional
+import pandas as pd
+from functools import partial
+from markets import (
+    etl as mkt_etl,
+    DEFAULT_FILENAME as MARKETS_FILENAME,
+)
+from tools import (
+    etl as tools_etl,
+    DEFAULT_FILENAME as TOOLS_FILENAME,
+)
+from profitability import run_profitability_analysis
+logging.basicConfig(level=logging.INFO)
+def get_question(text: str) -> str:
+    """Get the question from a text."""
+    # Regex to find text within double quotes
+    pattern = r'"([^"]*)"'
+    # Find all occurrences
+    questions = re.findall(pattern, text)
+    # Assuming you want the first question if there are multiple
+    question = questions[0] if questions else None
+    return question
+def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
+    """Get the current answer for a question."""
+    row = fpmms[fpmms['title'] == text]
+    if row.shape[0] == 0:
+        return None
+    return row['currentAnswer'].values[0]
+def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
+    """Convert a block number to a timestamp."""
+    block = web3.eth.get_block(block_number)
+    timestamp = datetime.utcfromtimestamp(block['timestamp'])
+    return timestamp.strftime('%Y-%m-%d %H:%M:%S')
+def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
+    """Parallelize the timestamp conversion."""
+    block_numbers = df['request_block'].tolist()
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        results = list(tqdm(executor.map(function, block_numbers), total=len(block_numbers)))
+    return results
+def weekly_analysis():
+    """Run weekly analysis for the FPMMS project."""
+    rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
+    web3 = Web3(Web3.HTTPProvider(rpc))
+    # Run markets ETL
+    logging.info("Running markets ETL")
+    mkt_etl(MARKETS_FILENAME)
+    logging.info("Markets ETL completed")
+    # Run tools ETL
+    logging.info("Running tools ETL")
+    tools_etl(
+        rpcs=[rpc],
+        filename=TOOLS_FILENAME,
+        full_contents=True,
+    )
+    logging.info("Tools ETL completed")
+    # Run profitability analysis
+    logging.info("Running profitability analysis")
+    if os.path.exists("fpmmTrades.csv"):
+        os.remove("fpmmTrades.csv")
+    run_profitability_analysis(
+        rpc=rpc,
+    )
+    logging.info("Profitability analysis completed")
+    # Get currentAnswer from FPMMS
+    fpmms = pd.read_csv(MARKETS_FILENAME)
+    tools = pd.read_csv(TOOLS_FILENAME)
+    # Get the question from the tools
+    logging.info("Getting the question and current answer for the tools")
+    tools['title'] = tools['prompt_request'].apply(lambda x: get_question(x))
+    tools['currentAnswer'] = tools['title'].apply(lambda x: current_answer(x, fpmms))
+    tools['currentAnswer'] = tools['currentAnswer'].str.replace('yes', 'Yes')
+    tools['currentAnswer'] = tools['currentAnswer'].str.replace('no', 'No')
+    # Convert block number to timestamp
+    logging.info("Converting block number to timestamp")
+    partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)
+    missing_timestamps = parallelize_timestamp_conversion(tools, partial_block_number_to_timestamp)
+    tools['request_time'] = missing_timestamps
+    tools['request_month_year'] = pd.to_datetime(tools['request_time']).dt.strftime('%Y-%m')
+    tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)
+    # Save the tools
+    tools.to_csv(TOOLS_FILENAME, index=False)
+    logging.info("Weekly analysis files generated and saved")
+if __name__ == "__main__":
+    weekly_analysis()