| # Script to attempt to clean up our owncloud database (b/32) after The Postgres |
| # Fuckup (b/30). |
| # |
| # Think of it as a one-shot fsck, documented in the form of the code that q3k@ |
| # used to recover from this kerfuffle. |
| # |
| # SECURITY: It's full of manual SQL query crafting without parametrization. |
| # Don't attempt to use it for anything else other than this one-shot usecase. |
| # |
| # You will need to tunnel to the postgreses running on Boston: |
| # $ ssh \ |
| # -L15432:127.0.0.1:5432 \ |
| # -L15433:127.0.0.1:5433 \ |
| # hackerspace.pl |
| |
| from datetime import datetime |
| import os |
| |
| import psycopg2 |
| |
| |
| incident_start = 1611529200 # when pg12 started to run |
| incident_end = 1611788400 # when we rolled back to pg9 |
| |
| |
| OWNCLOUD_PASSWORD = os.environ.get("OWNCLOUD_PASSWORD").strip() |
| if not OWNCLOUD_PASSWORD: |
| # Get it from boston, /var/www/owncloud/config/config.php. |
| raise Exception("OWNCLOUD_PASSWORD must be set to owncloud postgres password") |
| |
| |
| conn9 = psycopg2.connect(host="localhost", port=15432, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud") |
| conn12 = psycopg2.connect(host="localhost", port=15433, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud") |
| |
| |
| def idset(conn, table, keyname="id"): |
| """Return a set of IDs from a given table, one per row.""" |
| cur = conn.cursor() |
| cur.execute(f"SELECT {keyname} FROM oc_{table}") |
| res = cur.fetchall() |
| cur.close() |
| return set([r[0] for r in res]) |
| |
| |
| def valset(conn, table, keys): |
| """Return a set of concatenated values for the given keys in a table, one per row.""" |
| keynames = ", ".join(keys) |
| cur = conn.cursor() |
| cur.execute(f"SELECT {keynames} FROM oc_{table}") |
| res = cur.fetchall() |
| cur.close() |
| res = [';;;'.join([str(elem) for elem in r]) for r in res] |
| return set(res) |
| |
| |
| # Check accounts difference. |
| # |
| # RESULT: Thankfully, no accounts have been accidentally roled back. |
| accounts12 = idset(conn12, "accounts", keyname="uid") |
| accounts9 = idset(conn9, "accounts", keyname="uid") |
| print("Accounts missing in 9:", accounts12 - accounts9) |
| assert (accounts12 - accounts9) == set() |
| |
| |
| def account_by_uid(conn, uid): |
| """Return SSO UID for a given Owncloud UID.""" |
| cur = conn.cursor() |
| cur.execute(f"SELECT ldap_dn FROM oc_ldap_user_mapping WHERE owncloud_name = '{uid}'") |
| dn, = cur.fetchone() |
| cur.close() |
| part = dn.split(',')[0] |
| assert part.startswith('uid=') |
| return part[4:] |
| |
| |
| def storage_owner_by_id(conn, id_): |
| """Return SSO UID for a given storage numerical ID.""" |
| cur = conn.cursor() |
| cur.execute(f"SELECT id FROM oc_storages WHERE numeric_id = '{id_}'") |
| oid, = cur.fetchone() |
| cur.close() |
| if oid == 'object::store:amazon::nextcloud': |
| return "S3" |
| assert oid.startswith('object::user:') |
| userid = oid[13:] |
| assert len(userid) > 0 |
| if userid == "gallery": |
| return "GALLERY" |
| return account_by_uid(conn, userid) |
| |
| |
| # Check shares table. This table contains the intent of sharing some file with someone else. |
| # |
| # RESULT: we only have things that have been removed after rollback to PG9, |
| # nothing was created in PG12 and lost. |
| shareids12 = idset(conn12, "share") |
| shareids9 = idset(conn9, "share") |
| print("Shares missing in 9:", len(shareids12 - shareids9)) |
| cur12 = conn12.cursor() |
| for id_ in list(shareids12-shareids9): |
| cur12.execute(f"SELECT uid_owner, file_target, stime, share_with FROM oc_share WHERE id = {id_}") |
| uid_owner, file_target, stime, share_with = cur12.fetchone() |
| account = account_by_uid(conn12, uid_owner) |
| stime_human = datetime.utcfromtimestamp(stime).strftime('%Y-%m-%d %H:%M:%S') |
| print(f"Missing share {id_} {file_target} owned by {account}..") |
| if stime < incident_start or stime > incident_end: |
| print(f" Skipping, created at {stime_human}") |
| continue |
| raise Exception("Unhandled.") |
| cur12.close() |
| |
| |
| # Check mounts table. This contains root file storages for each user, but also |
| # incoming shares 'mounted' into a user's account. |
| # From what I cen tell, storage_id/root_id are the source path that's being |
| # mounted (root_id being the fileid inside an oc_filecache, and storage_id |
| # being the storage in which that file is kept), while user_id/mount_point are |
| # the mount destination (ie. path into which this is mounted for a user's |
| # view). |
| # |
| # RESULT: we only have share-mounts missing for a handful of users. We choose |
| # to ignore it, as we assume next time these users log in they will get the |
| # mounts again. |
| # TODO(q3k): verify this |
| mounts12 = valset(conn12, "mounts", ["storage_id", "root_id", "user_id", "mount_point"]) |
| mounts9 = valset(conn9, "mounts", ["storage_id", "root_id", "user_id", "mount_point"]) |
| print("Mounts missing in 9:", len(mounts12 - mounts9)) |
| # Mounts that appearify normally whenever you log into owncloud, as they are the result of shares': |
| mount_names_ok = set(["2020-03-26_covid_templar", "camera", "Public Shaming", "przylbice.md", "Test.txt", "covid"]) |
| # Mounts that used to be from a share that existed, but has been since deleted in PG9. |
| mount_names_ok |= set(["Covid-instrukcje", "Chaos_modele_covid", "Covid_proces_presspack"]) |
| mounts_sorted = [] |
| for m in list(mounts12 - mounts9): |
| storage_id, root_id, user_id, mount_point = m.split(';;;') |
| mounts_sorted.append((storage_id, root_id, user_id, mount_point)) |
| mounts_sorted = sorted(mounts_sorted, key=lambda el: el[2]) |
| for storage_id, root_id, user_id, mount_point in mounts_sorted: |
| assert mount_point.startswith("/" + user_id + "/") |
| mount_point = mount_point[len(user_id)+1:] |
| account = account_by_uid(conn12, user_id) |
| print(f"Missing mount {mount_point}, storage ID {storage_id}, owned by {account}..") |
| storage_owner = storage_owner_by_id(conn12, storage_id) |
| print(f" Storage owner: {storage_owner}") |
| |
| parts = mount_point.split('/') |
| if len(parts) == 4 and parts[0] == '' and parts[1] == 'files' and parts[2] in mount_names_ok and parts[3] == '': |
| print(" Skipping, known okay") |
| continue |
| raise Exception("Unhandled") |
| |