victormiller commited on
Commit
44f2e3d
1 Parent(s): 752d87b

Update common.py

Browse files
Files changed (1) hide show
  1. common.py +23 -1
common.py CHANGED
@@ -46,6 +46,27 @@ r"[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:(?:[A
46
  ip_address_code = """
47
  r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
48
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  global_div = Div(
50
  Section(
51
  H2("Global Steps"),
@@ -100,12 +121,13 @@ global_div = Div(
100
  H2("PII Removal"),
101
  H3("Motivation Behind PII Removal"),
102
  P("PII refers to any information that can be used to identify an individual, such as names, addresses, phone numbers, email addresses, and social security numbers. PII removal is essential for data privacy and security, as well as for compliance with global regulations. By removing PII from the training data, we can reduce the risk of data breaches and unauthorized access to sensitive information. Additionally, models can also generate PII during inference time."),
 
103
  ),
104
  Section(
105
  H3("Removing PII"),
106
  P("We have removed two types of PII from the dataset: email address and IP address. Regular expressions are used to identify and replace these PII with a generic placeholder. Below is an example of how we removed email addresses from the dataset:"),
107
  P("We have used the following regular expressions to identify and replace PII:"),
108
- Ul(Li("Email:"), Li(email_code, style="list-style-type: none"), Li("IP Address: NEED TO UPDATE"), Li(ip_address_code, style="list-style-type: none")),
109
  ),
110
  Section(
111
  H2("Normalization Form C (NFC)"),
 
46
  ip_address_code = """
47
  r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
48
  """
49
+
50
+ pii_table = pd.DataFrame(
51
+ {
52
+ "OPII Type": [
53
+ "Email",
54
+ "IP Address",
55
+ ],
56
+ "Examples": [
57
+ "john.doe@llm360.ai",
58
+ "172.217.164.110",
59
+ ],
60
+ "Target": [
61
+ "firstname.lastname@example.com",
62
+ "["22.214.171.124" , ...]",
63
+ ],
64
+ }
65
+ )
66
+
67
+ table_html_pii = pii_table.to_html(index=False, border=0)
68
+ table_div_pii = Div(NotStr(table_html_pii), style="margin: 40px;")
69
+
70
  global_div = Div(
71
  Section(
72
  H2("Global Steps"),
 
121
  H2("PII Removal"),
122
  H3("Motivation Behind PII Removal"),
123
  P("PII refers to any information that can be used to identify an individual, such as names, addresses, phone numbers, email addresses, and social security numbers. PII removal is essential for data privacy and security, as well as for compliance with global regulations. By removing PII from the training data, we can reduce the risk of data breaches and unauthorized access to sensitive information. Additionally, models can also generate PII during inference time."),
124
+ table_div_pii,
125
  ),
126
  Section(
127
  H3("Removing PII"),
128
  P("We have removed two types of PII from the dataset: email address and IP address. Regular expressions are used to identify and replace these PII with a generic placeholder. Below is an example of how we removed email addresses from the dataset:"),
129
  P("We have used the following regular expressions to identify and replace PII:"),
130
+ Ul(Li("Email:"), Li(email_code, style="list-style-type: none"), Li("IP Address:"), Li(ip_address_code, style="list-style-type: none")),
131
  ),
132
  Section(
133
  H2("Normalization Form C (NFC)"),