Spaces:

Emil25
/

mlops

Sleeping

App Files Files Community

Emil25 commited on Apr 21

Commit

61f924c

•

1 Parent(s): 013cf90

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +23 -0
main.py +151 -0
scripts/download_data.py +17 -0
scripts/model_training.py +56 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+FROM python:3.10
+USER root
+WORKDIR /app
+COPY scripts /app/
+COPY main.py /app/
+COPY requirements.txt /app/
+RUN apt-get update && \
+    apt-get install -y python3-pip python3-venv
+RUN python3 -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir --upgrade -r /app/requirements.txt
+CMD streamlit run main.py

main.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from scripts.model_training import model_training
+import pandas as pd
+import streamlit as st
+st.set_page_config(
+    page_title="Cardiovascular-Disease App",
+    page_icon="🧊",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+def user_input_features():
+    age = st.sidebar.slider('Возраст',
+                            min_value=10,
+                            max_value=100,
+                            step=1,
+    )
+    gender= st.sidebar.selectbox('Пол',
+                                options=('Мужской', 'Женский'),
+    )
+    height = st.sidebar.slider('Рост (см)',
+                            min_value=100,
+                            max_value=200,
+                            value=150,
+                            step=1,
+    )
+    weight = st.sidebar.slider('Вес (кг)',
+                            min_value=30,
+                            max_value=200,
+                            value=70,
+                            step=1,
+    )
+    ap_hi = st.sidebar.slider('Систолическое артериальное давление',
+                            min_value=50,
+                            max_value=200,
+                            value=120,
+                            step=1,
+    )
+    ap_lo = st.sidebar.slider('Диастолическое артериальное давление',
+                            min_value=50,
+                            max_value=200,
+                            value=80,
+                            step=1,
+    )
+    cholesterol = st.sidebar.selectbox('Общий холестерин (ммоль/л.)',
+                                options=('<5','5-63', '>6.3'),
+    )
+    gluc = st.sidebar.selectbox('Глюкоза (ммоль/л.)',
+                                options=('3.5—5.5','5.6-9', '>9'),
+    )
+    smoke = st.sidebar.selectbox('Курение',
+                                options=('Да','Нет'),
+    )
+    alco = st.sidebar.selectbox('Употребление алкоголя',
+                                options=('Да','Нет'),
+    )
+    active = st.sidebar.selectbox('Физическая активность',
+                                options=('Да','Нет'),
+    )
+    def map_gluc(gluc):
+        if gluc == '3.5—5.5':
+            return '1'
+        elif gluc == '5.6-9':
+            return '2'
+        else:
+            return '3'
+    def map_cholesterol(cholesterol):
+        if cholesterol == '<5':
+            return '1'
+        elif cholesterol == '5-63':
+            return '2'
+        else:
+            return '3'
+    age = age * 365
+    data = {'age': age,
+            'gender': '1' if gender == 'Женский' else '0',
+            'height': height,
+            'weight': weight,
+            'ap_hi': ap_hi,
+            'ap_lo': ap_lo,
+            'cholesterol': map_cholesterol(cholesterol),
+            'gluc': map_gluc(gluc),
+            'smoke': '1' if smoke == 'Да' else '0',
+            'alco':  '1' if alco == 'Да' else '0',
+            'active': '1' if active == 'Да' else '0',
+    }
+    features = pd.DataFrame(data, index=[0])
+    return features
+@st.cache_data()
+def get_model():
+    model, metric = model_training()
+    model_json = {'model': model,
+                  'metric': metric}
+    return model_json
+def main():
+    st.write(""" # Приложение для определения наличия сердечно-сосудистого заболевания (ССЗ) :heartpulse: """)
+    st.sidebar.header("Параметры ввода")
+    st.divider()
+    user_data = user_input_features()
+    st.write(" # Ваши данные")
+    new_column_names = {'age': 'Возраст (дней)',
+                        'gender': 'Пол',
+                        'height': 'Рост (см)' ,
+                        'weight': 'Вес (кг)',
+                        'ap_hi': 'Систолическое давление',
+                        'ap_lo': 'Диастолическое давление',
+                        'cholesterol': 'Общий холестерин',
+                        'gluc': 'Глюкоза',
+                        'smoke': 'Курение',
+                        'alco':  'Алкоголь',
+                        'active': 'Физическая активность',
+                        'cardio': 'x',
+        }
+    user_data_rus = user_data.rename(columns=new_column_names)
+    st.dataframe(user_data_rus)
+    with st.spinner('Загрузка модели ...'):
+        model = get_model()
+        st.success('Модель загружена!')
+    st.divider()
+    diag_btn = st.button("Диагностика", type="primary")
+    if diag_btn == True:
+        result = ' '.join(map(str, model['model'].predict(user_data)))
+        result = "Положительный" if "result" == "positive" else "Отрицательный"
+        metric = model['metric']
+        col1, col2 = st.columns(2)
+        col1.metric(label=" # :heartpulse: Результат",
+                    value=result,
+        )
+        col2.metric(label=" # Метрика",
+                    value=str(metric),
+        )
+if __name__ == "__main__":
+    main()

scripts/download_data.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Загрузка необходимых библиотек
+from sklearn.datasets import fetch_openml
+from sklearn.model_selection import train_test_split
+def download_data():
+    # Загрузка датасета diabetes с помощью fetch_openml
+    cardio_data = fetch_openml("Cardiovascular-Disease-dataset", version=1, parser="auto")
+    cardio_data_df = cardio_data.frame
+    cardio_data_df['cardio'] = cardio_data_df['cardio'].apply(lambda x: 'positive' if x=='1' else 'negative')
+    # Разделение данных на обучающую и тестовую выборки
+    train_set, test_set = train_test_split(cardio_data_df, test_size=0.1, random_state=42)
+    return train_set, test_set

scripts/model_training.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from sklearn.ensemble import RandomForestClassifier
+from sklearn.pipeline import Pipeline
+from scripts.download_data import download_data
+from sklearn.metrics import f1_score
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import PowerTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import QuantileTransformer
+import pandas as pd
+def calculate_metric(model):
+    _, test_set = download_data()
+    X_test, y_test = test_set.drop(columns=['cardio']), test_set['cardio']
+    y_pred = model.predict(X_test)
+    f1 = f1_score(y_test, y_pred, pos_label='positive')
+    return f1
+def model_training():
+    train_set, _ = download_data()
+    X_train, y_train = train_set.drop(columns=['cardio']), train_set['cardio']
+    num_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo',]
+    cat_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
+    num_pipe = Pipeline([
+        ('qt', QuantileTransformer(output_distribution="normal")),
+        ('scaler', StandardScaler()),
+        ('power', PowerTransformer()),
+    ])
+    cat_pipe = Pipeline([
+        ('encoder', OneHotEncoder(handle_unknown='ignore'))
+    ])
+    preprocessors_all = ColumnTransformer(transformers=[
+        ('num_p', num_pipe, num_columns),
+        ('cat_p', cat_pipe, cat_columns),
+    ])
+    pipe_all = Pipeline([
+                ('preprocessors', preprocessors_all),
+                ('model', RandomForestClassifier(n_estimators=200,
+                                      criterion = "gini",
+                                      min_samples_split=15,
+                                      max_depth=15,
+                                      oob_score=True)
+                )
+    ])
+    pipe_all.fit(X_train, y_train)
+    return pipe_all, calculate_metric(pipe_all)